Logo Search packages:      
Sourcecode: w3mmee version File versions  Download package

conv.c

#include <stdio.h>
#include <string.h>
#include "fm.h"

#ifdef JP_CHARSET
#include "terms.h"
#include "Str.h"

#ifdef DEBUG
#include <malloc.h>
#endif                        /* DEBUG */

#define     uchar       unsigned char
#define ushort          unsigned short
#define uint            unsigned int

#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define     TRUE        1
#define     FALSE       0
#ifdef ESC_CODE
#undef ESC_CODE
#endif
#define ESC_CODE  '\033'

#define CODE_STATE(c)   ((c) & 0x0f)
#define EUC_STATE(c)    ((c) & 0xf0)
#define SJIS_STATE(c)   ((c) & 0xf0)
#define ISO_STATE(c)    ((c) & 0xf0)

#define CSET_ASCII      0
#define CSET_X0208      1
#define CSET_X0201K     2
#define CSET_UNKNOWN    3

#define     JSIcode  "\033$@"
#define     JSOcode  "\033(H"
#define     J2SIcode "\033$@"
#define     J2SOcode "\033(J"
#define     NSIcode  "\033$B"
#define     NSOcode  "\033(J"
#define     N2SIcode  "\033$B"
#define     N2SOcode  "\033(B"
#define     N3SIcode "\033$@"
#define     N3SOcode "\033(B"
#define     USIcode  "\033$"
#define     USOcode  "\033+"
#define SIOcode_LEN_MAX (3)

static int cConvEJ(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
static int cConvES(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
static int cConvSE(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
static int cConvJE(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
char checkShiftCode(Str buf, uchar hint);

static char *han2zen_tab[] =
{
    "!!", "!#", "!V", "!W", "!\"", "!&", "%r", "%!",
    "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C",
    "!<", "%\"", "%$", "%&", "%(", "%*", "%+", "%-",
    "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=",
    "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L",
    "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^",
    "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i",
    "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,",
};

typedef struct _ConvRoutine {
  char key;
  int (*from)(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
  int (*to)(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left);
  JA_CES_conv_t cd;
} ConvRoutine;

static char ConvRoutineKeyMap[1 << CHAR_BIT] = {
  '\0',
};

static ConvRoutine FromToEJ[] =
{
    {CODE_JIS_J, cConvEJ, cConvJE, {CSET_ASCII, sizeof(JSIcode) - 1, sizeof(JSOcode) - 1, JSIcode, JSOcode}},
    {CODE_JIS_N, cConvEJ, cConvJE, {CSET_ASCII, sizeof(NSIcode) - 1, sizeof(NSOcode) - 1, NSIcode, NSOcode}},
    {CODE_JIS_n, cConvEJ, cConvJE, {CSET_ASCII, sizeof(N2SIcode) - 1, sizeof(N2SOcode) - 1, N2SIcode, N2SOcode}},
    {CODE_JIS_m, cConvEJ, cConvJE, {CSET_ASCII, sizeof(N3SIcode) - 1, sizeof(N3SOcode) - 1, N3SIcode, N3SOcode}},
    {CODE_JIS_j, cConvEJ, cConvJE, {CSET_ASCII, sizeof(J2SIcode) - 1, sizeof(J2SOcode) - 1, J2SIcode, J2SOcode}},
    {CODE_SJIS,  cConvES, cConvSE, {CSET_ASCII, 0, 0, "", ""}},
    {CODE_EUC,   cConvEJ, cConvEJ, {CSET_ASCII, 0, 0, "", ""}},
    {'\0', NULL, NULL, {'\0', 0, 0, NULL, NULL}}
};

int
getConvToEJRoutine(char key,
               int (**p_routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left),
               JA_CES_conv_t *cd)
{
  int i;

  if (!ConvRoutineKeyMap[CODE_EUC])
    for (i = 1 ; FromToEJ[i].key ; ++i)
      ConvRoutineKeyMap[(int)FromToEJ[i].key] = i;

  if ((i = ConvRoutineKeyMap[(int)(unsigned char)key])) {
    *p_routine = FromToEJ[i].to;
    *cd = FromToEJ[i].cd;
  }

  return i;
}

int
getConvFromEJRoutine(char key,
                 int (**p_routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left),
                 JA_CES_conv_t *cd)
{
  int i;

  if (!ConvRoutineKeyMap[CODE_EUC])
    for (i = 1 ; FromToEJ[i].key ; ++i)
      ConvRoutineKeyMap[(int)FromToEJ[i].key] = i;

  if ((i = ConvRoutineKeyMap[(int)(unsigned char)key])) {
    *p_routine = FromToEJ[i].from;
    *cd = FromToEJ[i].cd;
  }

  return i;
}

const char *
GetSICode(char key)
{
  int (*routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left);
  JA_CES_conv_t cd;

  if (getConvFromEJRoutine(key, &routine, &cd))
    return cd.ShiftIn;
  else
    return "";
}

const char *
GetSOCode(char key)
{
  int (*routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left);
  JA_CES_conv_t cd;

  if (getConvFromEJRoutine(key, &routine, &cd))
    return cd.ShiftOut;
  else
    return "";
}

static void
n_impr(char s)
{
    fprintf(stderr, "conv: option %c(0x%02x) is not implemented yet... sorry\n", s, s);
    w3m_exit(1);
}

static Str
conv_apply(Str is,
         int (*conv_func)(JA_CES_conv_t *, const char **, int *, char **, int *),
         JA_CES_conv_t *cd,
         int ascii_on_eof)
{
  Str os;
  const char *from;
  char *to;
  int from_left, to_left;

  from = (const char *)is->ptr;
  from_left = is->length;
  os = Strnew_size(from_left);
  to = os->ptr;
  to_left = os->area_size - 1;

  while (from_left)
    while (!conv_func(cd, &from, &from_left, &to, &to_left)) {
      os->length = to - os->ptr;
      Strassure(os, SIOcode_LEN_MAX);
      to = &os->ptr[os->length];
      to_left = os->area_size - 1 - os->length;
    }

  if (ascii_on_eof && cd->solen && cd->cset != CSET_ASCII) {
    os->length = to - os->ptr;
    Strassure(os, cd->solen);
    memcpy(&os->ptr[os->length], cd->ShiftOut, cd->solen);
    to = &os->ptr[os->length + cd->solen];
  }

  *to = '\0';
  os->length = to - os->ptr;
  return os;
}

Str
conv_str(Str is, char fc, char tc)
{
    Str os;
    int (*conv_func)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left);
    JA_CES_conv_t cd;

    if (fc == tc || fc == CODE_ASCII || tc == CODE_ASCII)
      return is;

    if (fc == CODE_INNER_EUC)
      os = is;
    else {
      if (!getConvToEJRoutine(fc, &conv_func, &cd)) {
          n_impr(fc);
          return NULL;
      }
      os = conv_apply(is, conv_func, &cd, 0);
    }
    if (tc == CODE_INNER_EUC || tc == CODE_EUC)
      return os;
    else {
      if (!getConvFromEJRoutine(tc, &conv_func, &cd)) {
          n_impr(tc);
          return NULL;
      }
      return conv_apply(os, conv_func, &cd, 1);
    }
}

Str
conv(char *is, char fc, char tc)
{
    return conv_str(Strnew_charp(is), fc, tc);
}

/*
 * Convert Shift-JIS to EUC-JP
 */

static uchar
getSLb(const uchar *ptr, uchar *ub)
{                       /* Get Shift-JIS Lower byte */
    uchar c = *ptr;

    *ub <<= 1;
    if (c < 0x9f) {
      if (c > 0x7e)
          c--;
      *ub -= 1;
      c -= 0x3f;
    }
    else {
      c -= 0x9e;
    }
    return c;
}

int
cConvSE(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left)
{
  const uchar *fb, *fe, *p;
  uchar *tb, *te, ub = '\0';
  int state = JA_SJIS_NOSTATE, res;

  p = fb = (const uchar *)*from;
  fe = &fb[*from_left];
  tb = (uchar *)*to;
  te = &tb[*to_left];

  while (p < fe && tb < te)
    switch (state) {
    case JA_SJIS_NOSTATE:
      if (!(*p & 0x80)) { /* ASCII */
      *tb++ = *p++;
      fb = p;
      }
      else if (0x81 <= *p && *p <= 0x9f) {      /* JIS X 0208, 0213 */
      ub = *p++ & 0x7f;
      state = JA_SJIS_SHIFT_L;
      }
      else if (0xe0 <= *p && *p <= 0xef   /* JIS X 0208 */
             /* *p <= 0xfc */ /* JIS X 0213 */
             ) {
      ub = (*p++ & 0x7f) - 0x40;
      state = JA_SJIS_SHIFT_H;
      }
      else if (0xa0 <= *p && *p <= 0xdf) {      /* JIS X 0201-Kana */
      if (te - tb < 2)
        goto end;

      *tb++ = han2zen_tab[*p - 0xa0][0] | 0x80;
      *tb++ = han2zen_tab[*p++ - 0xa0][1] | 0x80;
      fb = p;
      }
      else
      fb = ++p; /* broken */

      break;
    case JA_SJIS_SHIFT_L:
    case JA_SJIS_SHIFT_H:
      if ((0x40 <= *p && *p <= 0x7e) ||
        (0x80 <= *p && *p <= 0xfc)) {     /* JIS X 0208, 0213 */
      uchar lb;

      if (te - tb < 2)
        goto end;

      lb = getSLb(p++, &ub);
      *tb++ = (ub + 0x20) | 0x80;
      *tb++ = (lb + 0x20) | 0x80;
      }
      else if (!(*p & 0x80))  /* broken ? */
      *tb++ = *p++;
      else
      ++p; /* broken */

      fb = p;
      state = JA_SJIS_NOSTATE;
      break;
    }
end:
  res = tb - (unsigned char *)*to;
  *to = (char *)tb;
  *to_left -= res;
  *from_left = fe - fb;
  *from = (const char *)fb;
  return res;
}

/*
 * Convert ISO-2022-JP to EUC-JP
 */

int
cConvJE(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left)
{
  const uchar *fb, *fe, *p;
  uchar *tb, *te, ub = '\0';
  int state = JA_ISO_NOSTATE, res;

  p = fb = (const uchar *)*from;
  fe = &fb[*from_left];
  tb = (uchar *)*to;
  te = &tb[*to_left];

  while (p < fe && tb < te)
    switch (state) {
    case JA_ISO_NOSTATE:
      if (*p == ESC_CODE) {   /* ESC sequence */
      state = JA_ISO_ESC;
      ++p;
      }
      else if (cd->cset == CSET_ASCII || *p < 0x21) {
      *tb++ = *p++;
      fb = p;
      }
      else if (cd->cset == CSET_X0208 && *p <= 0x7e) {
      /* JIS X 0208 */
      ub = *p++;
      state = JA_ISO_MBYTE1;
      }
      else if (cd->cset == CSET_X0201K && *p <= 0x5f) { /* JIS X 0201-Kana */
      if (te - tb < 2)
        goto end;

      *tb++ = han2zen_tab[*p - 0x20][0] | 0x80;
      *tb++ = han2zen_tab[*p++ - 0x20][1] | 0x80;
      fb = p;
      }
      else
      fb = ++p; /* broken */

      break;
    case JA_ISO_MBYTE1:
      if (*p == ESC_CODE) {   /* ESC sequence */
      state = JA_ISO_ESC;
      ++p;
      }
      else if (0x21 <= *p && *p <= 0x7e) {      /* JIS X 0208 */
      if (te - tb < 2)
        goto end;

      *tb++ = ub | 0x80;
      *tb++ = *p++ | 0x80;
      fb = p;
      state = JA_ISO_NOSTATE;
      }
      else {
      *tb++ = *p++;
      fb = p;
      state = JA_ISO_NOSTATE;
      }

      break;
    case JA_ISO_ESC:
      if (*p == '(') {  /* ESC ( F */
      state = JA_ISO_CS94;
      ++p;
      }
      else if (*p == '$') {   /* ESC $ F, ESC $ ( F */
      state = JA_ISO_MBCS;
      ++p;
      }
      else {
      if (te - tb < 2)
        goto end;

      *tb++ = ESC_CODE;
      *tb++ = *p++;
      fb = p;
      state = JA_ISO_NOSTATE;
      }

      break;
    case JA_ISO_CS94:
      if (*p == 'B' || *p == 'J' || *p == 'H')
      cd->cset = CSET_ASCII;
      else if (*p == 'I')
      cd->cset = CSET_X0201K;
      else {
      if (te - tb < 3)
        goto end;

      *tb++ = ESC_CODE;
      *tb++ =  '(';
      *tb++ = *p;
      }

      fb = ++p;
      state = JA_ISO_NOSTATE;
      break;
    case JA_ISO_MBCS:
      if (*p == '(') {  /* ESC $ ( F */
      state = JA_ISO_MBCS | JA_ISO_CS94;
      ++p;
      break;
      }
    case JA_ISO_MBCS | JA_ISO_CS94:
      if (*p == 'B' || *p == '@')
      cd->cset = CSET_X0208;
      else {
      if (te - tb < 3 + (state == (JA_ISO_MBCS | JA_ISO_CS94)))
        goto end;

      *te++ = ESC_CODE;
      *te++ = '$';

      if (state == (JA_ISO_MBCS | JA_ISO_CS94))
        *te++ = '(';

      *te++ = *p;
      }

      fb = ++p;
      state = JA_ISO_NOSTATE;
      break;
    }
end:
  res = tb - (unsigned char *)*to;
  *to = (char *)tb;
  *to_left -= res;
  *from_left = fe - fb;
  *from = (const char *)fb;
  return res;
}

int
cConvEJ(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left)
{
  const uchar *fb, *fe, *p;
  uchar *tb, *te, ub = '\0';
  int state = JA_EUC_NOSTATE, res, euc;

  p = fb = (const uchar *)*from;
  fe = &fb[*from_left];
  tb = (uchar *)*to;
  te = &tb[*to_left];
  euc = *cd->ShiftIn ? 0 : 0x80;

  while (p < fe && tb < te)
    switch (state) {
    case JA_EUC_NOSTATE:
      if (!(*p & 0x80)) {     /* ASCII */
      if (!euc && cd->cset != CSET_ASCII) {
        if (te - tb < cd->solen)
          goto end;

        memcpy(tb, cd->ShiftOut, cd->solen);
        tb += cd->solen;
        cd->cset = CSET_ASCII;
      }

      *tb++ = *p++;
      fb = p;
      }
      else if (0xa1 <= *p && *p <= 0xfe) {      /* JIS X 0208, 0213-1 */
      ub = *p++;
      state = JA_EUC_MBYTE1;
      }
      else if (*p == EUC_SS2_CODE) {      /* SS2 + JIS X 0201-Kana */
      state = JA_EUC_SS2;
      ++p;
      }
      else if (*p == EUC_SS3_CODE) {      /* SS3 + JIS X 0212, 0213-2 */
      state = JA_EUC_SS3;
      ++p;
      }
      else
      fb = ++p; /* broken */

      break;
    case JA_EUC_MBYTE1:
      if (0xa1 <= *p && *p <= 0xfe) {     /* JIS X 0208, 0213-1 */
      if (!euc && cd->cset != CSET_X0208) {
        if (te - tb < cd->silen)
          goto end;

        memcpy(tb, cd->ShiftIn, cd->silen);
        tb += cd->silen;
        cd->cset = CSET_X0208;
      }

      if (te - tb < 2)
        goto end;

      *tb++ = (ub & 0x7f) | euc;
      *tb++ = (*p++ & 0x7f) | euc;
      }
      else if (!(*p & 0x80)) {      /* broken ? */
      if (!euc && cd->cset != CSET_ASCII) {
        if (te - tb < cd->solen)
          goto end;

        memcpy(tb, cd->ShiftOut, cd->solen);
        tb += cd->solen;
        cd->cset = CSET_ASCII;
      }

      *tb++ = *p++;
      }
      else
      ++p; /* broken */

      fb = p;
      state = JA_EUC_NOSTATE;
      break;
    case JA_EUC_SS2:
      if (0xa0 <= *p && *p <= 0xdf) {     /* JIS X 0201-Kana */
      if (!euc && cd->cset != CSET_X0208) {
        if (te - tb < cd->silen)
          goto end;

        memcpy(tb, cd->ShiftIn, cd->silen);
        tb += cd->silen;
        cd->cset = CSET_X0208;
      }

      if (te - tb < 2)
        goto end;

      *tb++ = han2zen_tab[*p - 0xa0][0] | euc;
      *tb++ = han2zen_tab[*p - 0xa0][1] | euc;
      }

      fb = ++p;
      state = JA_EUC_NOSTATE;
      break;
    case JA_EUC_SS3:
      ++p;
      state = (JA_EUC_SS3 | JA_EUC_MBYTE1);
      break;
    case JA_EUC_SS3 | JA_EUC_MBYTE1:
      fb = ++p;
      state = JA_EUC_NOSTATE;
      break;
    }
end:
  res = tb - (unsigned char *)*to;
  *to = (char *)tb;
  *to_left -= res;
  *from_left = fe - fb;
  *from = (const char *)fb;
  return res;
}

/*
 * Convert EUC-JP to Shift-JIS
 */

void
put_sjis(uchar ub, uchar lb, char *s)
{
    ub -= 0x20;
    lb -= 0x20;
    if ((ub & 1) == 0)
      lb += 94;
    ub = ((ub - 1) >> 1) + 0x81;
    lb += 0x3f;
    if (ub > 0x9f)
      ub += 0x40;
    if (lb > 0x7e)
      lb++;

    s[0] = ub;
    s[1] = lb;
}

int
cConvES(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left)
{
  const uchar *fb, *fe, *p;
  uchar *tb, *te, ub = '\0';
  int state = JA_EUC_NOSTATE, res, euc;

  p = fb = (const uchar *)*from;
  fe = &fb[*from_left];
  tb = (uchar *)*to;
  te = &tb[*to_left];
  euc = *cd->ShiftIn ? 0 : 0x80;

  while (p < fe && tb < te)
    switch (state) {
    case JA_EUC_NOSTATE:
      if (!(*p & 0x80)) {     /* ASCII */
      *tb++ = *p++;
      fb = p;
      }
      else if (0xa1 <= *p && *p <= 0xfe) {      /* JIS X 0208, 0213-1 */
      ub = *p++;
      state = JA_EUC_MBYTE1;
      }
      else if (*p == EUC_SS2_CODE) {      /* SS2 + JIS X 0201-Kana */
      state = JA_EUC_SS2;
      ++p;
      }
      else if (*p == EUC_SS3_CODE) {      /* SS3 + JIS X 0212, 0213-2 */
      state = JA_EUC_SS3;
      ++p;
      }
      else
      fb = ++p; /* broken */

      break;
    case JA_EUC_MBYTE1:
      if (0xa1 <= *p && *p <= 0xfe) {     /* JIS X 0208, 0213-1 */
      if (te - tb < 2)
        goto end;

      put_sjis(ub & 0x7f, *p & 0x7f, tb);
      tb += 2;
      }
      else if (!(*p & 0x80))  /* broken ? */
      *tb++ = *p;

      fb = ++p;
      state = JA_EUC_NOSTATE;
      break;
    case JA_EUC_SS2:
      if (0xa0 <= *p && *p <= 0xdf) {     /* JIS X 0201-Kana */
      if (te - tb < 2)
        goto end;

      put_sjis(han2zen_tab[*p - 0xa0][0],
             han2zen_tab[*p - 0xa0][1],
             tb);
      }

      fb = ++p;
      state = JA_EUC_NOSTATE;
      break;
    case JA_EUC_SS3:
      state = (JA_EUC_SS3 | JA_EUC_MBYTE1);
      break;
    case JA_EUC_SS3 | JA_EUC_MBYTE1:
      state = JA_EUC_NOSTATE;
      fb = ++p;
      break;
    }
end:
  res = tb - (unsigned char *)*to;
  *to = (char *)tb;
  *to_left -= res;
  *from_left = fe - fb;
  *from = (const char *)fb;
  return res;
}

void
JA_CES_find1(const uchar *p, JA_CES_stat_t *st)
{
  if (st->iso != JA_CODE_ERROR && (st->si == '\0' || st->so == '\0')) {
    switch (ISO_STATE(st->iso)) {
    case JA_ISO_NOSTATE:
      if (*p == ESC_CODE)     /* ESC sequence */
      st->iso = (CODE_STATE(st->iso) | JA_ISO_ESC);
      break;
    case JA_ISO_ESC:
      if (*p == '(')    /* ESC ( F */
      st->iso = (CODE_STATE(st->iso) | JA_ISO_CS94);
      else if (*p == '$')     /* ESC $ F, ESC $ ( F */
      st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS);
      else
      st->iso = (CODE_STATE(st->iso) | JA_ISO_NOSTATE);
      break;
    case JA_ISO_CS94:
      if (*p == 'B' || *p == 'J' || *p == 'H')
      st->so = *p;
      else if (*p == 'I')
      st->iso_kana = JA_CODE_OK;
      st->iso = (CODE_STATE(st->iso) | JA_ISO_NOSTATE);
      break;
    case JA_ISO_MBCS:
      if (*p == '(') {  /* ESC $ ( F */
      st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_CS94);
      break;
      }
    case JA_ISO_MBCS | JA_ISO_CS94:
      if (*p == 'B' || *p == '@')
      st->si = *p;
      st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_MBYTE1);
      break;
    case JA_ISO_MBCS | JA_ISO_MBYTE1:
      if (*p == ESC_CODE)     /* ESC sequence */
      st->iso = (CODE_STATE(st->iso) | JA_ISO_ESC);
      else
      st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_CS94 | JA_ISO_MBYTE1);
      break;
    case JA_ISO_MBCS | JA_ISO_CS94 | JA_ISO_MBYTE1:
      st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_MBYTE1);
      break;
    }

    if (*p & 0x80)
      st->iso = JA_CODE_ERROR;
  }

  if (st->euc != JA_CODE_ERROR) {
    switch (EUC_STATE(st->euc)) {
    case JA_EUC_NOSTATE:
      if (!(*p & 0x80)) /* ASCII */
      ;
      else if (0xa1 <= *p && *p <= 0xfe)  /* JIS X 0208, 0213-1 */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_MBYTE1);
      else if (*p == EUC_SS2_CODE)  /* SS2 + JIS X 0201-Kana */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_SS2);
      else if (*p == EUC_SS3_CODE)  /* SS3 + JIS X 0212, 0213-2 */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_SS3);
      else
      st->euc = JA_CODE_ERROR;
      break;
    case JA_EUC_MBYTE1:
      if (CODE_STATE(st->euc) == JA_CODE_NORMAL)
      st->euc = JA_CODE_OK;
    case JA_EUC_SS3 | JA_EUC_MBYTE1:
      if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_NOSTATE);
      else if (st->euc & JA_CODE_BROKEN)
      st->euc = JA_CODE_ERROR;
      else
      st->euc = (JA_CODE_BROKEN | JA_EUC_NOSTATE);
      break;
    case JA_EUC_SS2:
      if (0xa0 <= *p && *p <= 0xdf) /* JIS X 0201-Kana */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_NOSTATE);
      else
      st->euc = JA_CODE_ERROR;
      break;
    case JA_EUC_SS3:
      if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0212, 0213-2 */
      st->euc = (CODE_STATE(st->euc) | JA_EUC_SS3 | JA_EUC_MBYTE1);
      else
      st->euc = JA_CODE_ERROR;
      break;
    }
  }

  if (st->sjis != JA_CODE_ERROR) {
    switch (SJIS_STATE(st->sjis)) {
    case JA_SJIS_NOSTATE:
      if (!(*p & 0x80)) /* ASCII */
      ;
      else if (0x81 <= *p && *p <= 0x9f)
      st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_SHIFT_L);
      else if (0xe0 <= *p && *p <= 0xef)  /* JIS X 0208 */
      /* else if (0xe0 <= *p && *p <= 0xfc) */
      /* JIS X 0213 */
      st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_SHIFT_H);
      else if (0xa0 == *p)
      st->sjis = (JA_CODE_BROKEN | JA_SJIS_NOSTATE);
      else if (0xa1 <= *p && *p <= 0xdf)  /* JIS X 0201-Kana */
      st->sjis_kana = JA_CODE_OK;
      else
      st->sjis = JA_CODE_ERROR;
      break;
    case JA_SJIS_SHIFT_L:
    case JA_SJIS_SHIFT_H:
      if (CODE_STATE(st->sjis) == JA_CODE_NORMAL)
      st->sjis = JA_CODE_OK;
      if ((0x40 <= *p && *p <= 0x7e) ||
        (0x80 <= *p && *p <= 0xfc))       /* JIS X 0208, 0213 */
      st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_NOSTATE);
      else if (st->sjis & JA_CODE_BROKEN)
      st->sjis = JA_CODE_ERROR;
      else
      st->sjis = (JA_CODE_BROKEN | JA_SJIS_NOSTATE);
      break;
    }
  }
}

void
JA_CES_find(const uchar *p, int len, JA_CES_stat_t *st)
{
  const uchar *endp;

  for (endp = &p[len] ; p < endp ; ++p) {
    JA_CES_find1(p, st);

    if (st->euc == JA_CODE_ERROR || st->sjis == JA_CODE_ERROR)
      break;
  }
}

char
JA_CES_examin_siso(JA_CES_stat_t *st)
{
  switch (st->si) {
  case '@':
    switch (st->so) {
    case 'H':
      return CODE_JIS_J;
    case 'J':
      return CODE_JIS_j;
    case 'B':
      return CODE_JIS_m;
    default:
      return CODE_JIS_m;
    }
  case 'B':
    switch (st->so) {
    case 'J':
      return CODE_JIS_N;
    case 'B':
      return CODE_JIS_n;
    default:
      return CODE_JIS_n;
    }
  default:
    switch (st->so) {
    case 'H':
      return CODE_JIS_J;
    case 'J':
      return CODE_JIS_N;
    case 'B':
      return CODE_JIS_n;
    default:
      return CODE_JIS_n;
    }
  }
}

char
JA_CES_examine_stat(JA_CES_stat_t *st)
{
  if (st->iso != JA_CODE_ERROR) {
    if (st->si == '\0' && st->so == '\0' && st->iso_kana != JA_CODE_OK)
      return '\0';

    return JA_CES_examin_siso(st);
  }

  if (st->hint == CODE_EUC) {
    if (st->euc != JA_CODE_ERROR)
      return CODE_EUC;
  } else if (st->hint == CODE_SJIS) {
    if (st->sjis != JA_CODE_ERROR)
      return CODE_SJIS;
  }

  if (CODE_STATE(st->euc) == JA_CODE_OK)
    return CODE_EUC;

  if (CODE_STATE(st->sjis) == JA_CODE_OK)
    return CODE_SJIS;

  return CODE_UNKNOWN;
}

char
checkShiftCode(Str buf, uchar hint)
{
    JA_CES_stat_t st;

    if (hint == CODE_INNER_EUC)
      return '\0';
    JA_CES_stat_init(&st, hint);
    JA_CES_find((const unsigned char *)buf->ptr, buf->length, &st);
    if ((st.found = JA_CES_examine_stat(&st)) != CODE_UNKNOWN)
      return st.found;
    if (CODE_STATE(st.euc) == JA_CODE_NORMAL)
      return CODE_EUC;
    if (CODE_STATE(st.sjis) == JA_CODE_NORMAL)
      return CODE_SJIS;
    return CODE_EUC;
}
#elif defined(MANY_CHARSET)

#include "Str.h"

mb_cs_detector_t *conv_cs_detector = NULL;
mb_setup_t conv_mb_setup_r = {};
mb_setup_t conv_mb_setup_w = {};

mb_ws_conv_t *input_converters = NULL;
mb_ws_conv_t *output_converters = NULL;

void
conv_init_r(const char *lang, const char *ics, const char *op, ...)
{
  mb_cs_detector_stat_t statv[MB_CS_DETECT_CHOICEMAX];
  size_t nstats;
  va_list ap;

  va_start(ap, op);

  if (lang && mb_lang_to_detector(lang, statv, &nstats)) {
    conv_cs_detector = New(mb_cs_detector_t);
    bzero(conv_cs_detector, sizeof(*conv_cs_detector));

    if (ics) {
      mb_info_t dummy = {};

      mb_ces_by_name(ics, &dummy);

      if (!(dummy.flag & MB_FLAG_UNKNOWNCS)) {
      mb_ces_t *ces = dummy.ces;
      size_t i, j;

      conv_cs_detector->stat[0].ces = ces;

      for (i = 1, j = 0 ; j < nstats && i < MB_CS_DETECT_CHOICEMAX ; ++j)
        if (statv[j].ces != ces)
          conv_cs_detector->stat[i++].ces = statv[j].ces;

      conv_cs_detector->nstats = i;
      goto end;
      }
    }

    memcpy(conv_cs_detector->stat, statv, sizeof(mb_cs_detector_stat_t) * nstats);
    conv_cs_detector->nstats = nstats;
  }
  else
    conv_cs_detector = NULL;
end:
  mb_vsetsetup(&conv_mb_setup_r, op, ap);
  va_end(ap);
}

void
conv_setup_r(const char *op, ...)
{
  va_list ap;

  va_start(ap, op);
  mb_vsetsetup(&conv_mb_setup_r, op, ap);
  va_end(ap);
}

void
conv_init_w(const char *op, ...)
{
  va_list ap;

  va_start(ap, op);
  mb_vsetsetup(&conv_mb_setup_w, op, ap);
  va_end(ap);
}

void
conv_setup_w(const char *op, ...)
{
  va_list ap;

  va_start(ap, op);
  mb_vsetsetup(&conv_mb_setup_w, op, ap);
  va_end(ap);
}

static Str
conv_apply_convv_cat(Str d, mb_wchar_t *wb, mb_wchar_t *we, char *b, char *e, char *s)
{
  char mbs[BUFSIZ], *p;

  for (p = mbs ; wb < we ; ++wb)
    p += mb_wchar_to_mbc(*wb, p);

  if (d)
    Strcat_charp_n(d, mbs, p - mbs);
  else if (p - mbs != e - b || memcmp(mbs, b, p - mbs)) {
    d = Strnew_size(b - s + p - mbs);
    Strcat_charp_n(d, s, b - s);
    Strcat_charp_n(d, mbs, p - mbs);
  }

  return d;
}

char *
conv_apply_convv(char *s, int *p_n, mb_ws_conv_t *cv, mb_info_t *info)
{
  Str d = NULL;
  char *p, *ep, *q;
  mb_wchar_t ws[BUFSIZ / MB_MBC_LEN_MAX], *ewp;

  if (!cv)
    cv = input_converters;

  for (p = q = s, ep = p + *p_n, ewp = ws ; p < ep ;) {
    ewp = ws;
    p = (char *)mb_mem_to_wstr(p, ep, &ewp, ws + sizeof(ws) / sizeof(ws[0]));

    if (cv)
      mb_apply_convv(ws, ewp, cv, info);

    d = conv_apply_convv_cat(d, ws, ewp, q, p, s);
    q = p;
  }

  if (d) {
    *p_n = d->length;
    return d->ptr;
  }
  else
    return s;
}

size_t
conv_ucs2mb(mb_wchar_t wc, char *buf)
{
  mb_info_t info;
  mb_setup_t copy = conv_mb_setup_r;

  mb_setsetup(&copy, "@", "utf-8");
  mb_mem2mb_setup(&info, NULL, 0, &copy, "");

  if (input_converters)
    mb_apply_convv(&wc, &wc + 1, input_converters, &info);

  return mb_wchar_to_mbc(wc, buf);
}

static Str
conv_info2mbStr(mb_info_t *info, size_t n, Str s)
{
  Str d = s ? NULL : Strnew_size(n);
  mb_wchar_t wc, ws[BUFSIZ / MB_MBC_LEN_MAX];
  void *ewp;
  size_t i, nw = 0;

  do {
    i = info->b;
    ewp = ws;
    wc = mb_cs_detect_encode(info,
                       MB_ENCODE_TO_WS | MB_ENCODE_SKIP_INVALID | MB_ENCODE_SKIP_SHORT,
                       &ewp, ws + sizeof(ws) / sizeof(ws[0]));

    if ((mb_wchar_t *)ewp > ws) {
      nw += (mb_wchar_t *)ewp - ws;
      if (input_converters)
      mb_apply_convv(ws, ewp, input_converters, info);

      d = conv_apply_convv_cat(d, ws, ewp, &info->buf[i], &info->buf[info->b], info->buf);
    }
  } while (wc != mb_notchar_eof);

  return d ? d : (nw || !s->length) ? s : Strnew_size(0);
}

Str
conv_vmem2mbStr(const char *s, size_t n, Str S, const char **p_cs, const char *op, va_list ap)
{
  if (s && n) {
    mb_info_t info;
    mb_setup_t copy = conv_mb_setup_r;

    copy.cs = NULL;
    mb_vsetsetup(&copy, op, ap);
    if (!copy.cs && p_cs && *p_cs) copy.cs = *p_cs;
    if (!copy.cs) copy.cs = conv_mb_setup_r.cs;
    memset(&info, 0, sizeof(info));
    mb_mem2mb_setup(&info, s, n, &copy, "|", MB_FLAG_DONTFLUSH_BUFFER);

    if (info.flag & MB_FLAG_UNKNOWNCS && copy.cs && conv_mb_setup_r.cs)
      mb_ces_by_name(conv_mb_setup_r.cs, &info);
    else if (conv_cs_detector && !copy.cs)
      mb_bind_cs_detector(conv_cs_detector, &info);

    S = conv_info2mbStr(&info, n, S);

    if (p_cs && !*p_cs)
      *p_cs = info.ces->namev[0];

    return S;
  }
  else
    return Strnew();
}

Str
conv_mem2mbStr(const char *s, size_t n, const char **p_cs, const char *op, ...)
{
  va_list ap;
  Str d;

  va_start(ap, op);
  d = conv_vmem2mbStr(s, n, NULL, p_cs, op, ap);
  va_end(ap);
  return d;
}

Str
conv_vstr2mbStr(const char *s, const char **p_cs, const char *op, va_list ap)
{
  return conv_vmem2mbStr(s, s ? strlen(s) : 0, NULL, p_cs, op, ap);
}

Str
conv_str2mbStr(const char *s, const char **p_cs, const char *op, ...)
{
  va_list ap;
  Str d;

  va_start(ap, op);
  d = conv_vstr2mbStr(s, p_cs, op, ap);
  va_end(ap);
  return d;
}

Str
conv_Str2mbStr(Str s, const char **p_cs, const char *op, ...)
{
  va_list ap;
  Str d;

  va_start(ap, op);
  d = conv_vmem2mbStr(s->ptr, s->length, s, p_cs, op, ap);
  va_end(ap);
  return d;
}

size_t
conv_Str_write(const char *s, size_t n, void *ap)
{
  conv_Str_write_t *p = ap;

  p->d->length = p->info.e;
  Strassure(p->d, 1);
  p->info.buf = p->d->ptr;
  p->info.size = p->d->area_size;
  return 0;
}

size_t
conv_default_decoder(mb_wchar_t enc, mb_info_t *info)
{
  if (enc >= MB_WORD_ENC(mb_SBC, MB_CTL_FC, 0) && enc <= MB_WORD_ENC(mb_SBC, MB_CTL_FC, MB_SBC_UNIT - 1U)) {
    mb_store_octet((enc - MB_WORD_ENC(mb_SBC, MB_CTL_FC, 0)) | 0x80, info);
    return 1;
  }
  else
    return 0;
}

Str
conv_vmem2isoStr(const char *s, size_t n, const char *op, va_list ap)
{
  if (s && n) {
    conv_Str_write_t arg = {};
    mb_wchar_t ws[BUFSIZ], *ewp;
    size_t i;
    int cn;

    arg.d = Strnew_size(n);
    mb_vinit_w(&arg.info, &arg, conv_Str_write, &conv_mb_setup_w, op, ap);
    arg.info.flag |= MB_FLAG_DONTFLUSH_BUFFER;
    arg.info.buf = arg.d->ptr;
    arg.info.size = arg.d->area_size;


    for (ewp = ws, i = 0 ; i < n ;) {
      if ((cn = mb_mem_to_wchar_internal(&s[i], n - i, *ewp)) > 0)
      i += cn;
      else
      ++i;

      if (++ewp >= ws + sizeof(ws) / sizeof(ws[0])) {
      mb_apply_convv(ws, ewp, output_converters, &arg.info);
      mb_decode(ws, ewp, &arg.info);
      ewp = ws;
      }
    }

    if (ewp > ws) {
      mb_apply_convv(ws, ewp, output_converters, &arg.info);
      mb_decode(ws, ewp, &arg.info);
    }

    mb_store_char_noconv(EOF, &arg.info);
    arg.d->length = arg.info.e;
    Strassure(arg.d, 1);
    arg.d->ptr[arg.d->length] = '\0';
    return arg.d;
  }
  else
    return Strnew();
}

Str
conv_mem2isoStr(const char *s, size_t n, const char *op, ...)
{
  va_list ap;
  Str d;

  va_start(ap, op);
  d = conv_vmem2isoStr(s, n, op, ap);
  va_end(ap);
  return d;
}

Str
conv_vstr2isoStr(const char *s, const char *op, va_list ap)
{
  return conv_vmem2isoStr(s, s ? strlen(s) : 0, op, ap);
}

Str
conv_str2isoStr(const char *s, const char *op, ...)
{
  va_list ap;
  Str d;

  va_start(ap, op);
  d = conv_vstr2isoStr(s, op, ap);
  va_end(ap);
  return d;
}

#endif                        /* JP_CHARSET */

Generated by  Doxygen 1.6.0   Back to index