#include <stdio.h> #include <string.h> #include "fm.h" #ifdef JP_CHARSET #include "terms.h" #include "Str.h" #ifdef DEBUG #include <malloc.h> #endif /* DEBUG */ #define uchar unsigned char #define ushort unsigned short #define uint unsigned int #ifdef TRUE #undef TRUE #endif #ifdef FALSE #undef FALSE #endif #define TRUE 1 #define FALSE 0 #ifdef ESC_CODE #undef ESC_CODE #endif #define ESC_CODE '\033' #define CODE_STATE(c) ((c) & 0x0f) #define EUC_STATE(c) ((c) & 0xf0) #define SJIS_STATE(c) ((c) & 0xf0) #define ISO_STATE(c) ((c) & 0xf0) #define CSET_ASCII 0 #define CSET_X0208 1 #define CSET_X0201K 2 #define CSET_UNKNOWN 3 #define JSIcode "\033$@" #define JSOcode "\033(H" #define J2SIcode "\033$@" #define J2SOcode "\033(J" #define NSIcode "\033$B" #define NSOcode "\033(J" #define N2SIcode "\033$B" #define N2SOcode "\033(B" #define N3SIcode "\033$@" #define N3SOcode "\033(B" #define USIcode "\033$" #define USOcode "\033+" #define SIOcode_LEN_MAX (3) static int cConvEJ(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); static int cConvES(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); static int cConvSE(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); static int cConvJE(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); char checkShiftCode(Str buf, uchar hint); static char *han2zen_tab[] = { "!!", "!#", "!V", "!W", "!\"", "!&", "%r", "%!", "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C", "!<", "%\"", "%$", "%&", "%(", "%*", "%+", "%-", "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=", "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L", "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^", "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i", "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,", }; typedef struct _ConvRoutine { char key; int (*from)(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); int (*to)(JA_CES_conv_t *, const char **from, int *from_left, char **to, int *to_left); JA_CES_conv_t cd; } ConvRoutine; static char ConvRoutineKeyMap[1 << CHAR_BIT] = { '\0', }; static ConvRoutine FromToEJ[] = { {CODE_JIS_J, cConvEJ, cConvJE, {CSET_ASCII, sizeof(JSIcode) - 1, sizeof(JSOcode) - 1, JSIcode, JSOcode}}, {CODE_JIS_N, cConvEJ, cConvJE, {CSET_ASCII, sizeof(NSIcode) - 1, sizeof(NSOcode) - 1, NSIcode, NSOcode}}, {CODE_JIS_n, cConvEJ, cConvJE, {CSET_ASCII, sizeof(N2SIcode) - 1, sizeof(N2SOcode) - 1, N2SIcode, N2SOcode}}, {CODE_JIS_m, cConvEJ, cConvJE, {CSET_ASCII, sizeof(N3SIcode) - 1, sizeof(N3SOcode) - 1, N3SIcode, N3SOcode}}, {CODE_JIS_j, cConvEJ, cConvJE, {CSET_ASCII, sizeof(J2SIcode) - 1, sizeof(J2SOcode) - 1, J2SIcode, J2SOcode}}, {CODE_SJIS, cConvES, cConvSE, {CSET_ASCII, 0, 0, "", ""}}, {CODE_EUC, cConvEJ, cConvEJ, {CSET_ASCII, 0, 0, "", ""}}, {'\0', NULL, NULL, {'\0', 0, 0, NULL, NULL}} }; int getConvToEJRoutine(char key, int (**p_routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left), JA_CES_conv_t *cd) { int i; if (!ConvRoutineKeyMap[CODE_EUC]) for (i = 1 ; FromToEJ[i].key ; ++i) ConvRoutineKeyMap[(int)FromToEJ[i].key] = i; if ((i = ConvRoutineKeyMap[(int)(unsigned char)key])) { *p_routine = FromToEJ[i].to; *cd = FromToEJ[i].cd; } return i; } int getConvFromEJRoutine(char key, int (**p_routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left), JA_CES_conv_t *cd) { int i; if (!ConvRoutineKeyMap[CODE_EUC]) for (i = 1 ; FromToEJ[i].key ; ++i) ConvRoutineKeyMap[(int)FromToEJ[i].key] = i; if ((i = ConvRoutineKeyMap[(int)(unsigned char)key])) { *p_routine = FromToEJ[i].from; *cd = FromToEJ[i].cd; } return i; } const char * GetSICode(char key) { int (*routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left); JA_CES_conv_t cd; if (getConvFromEJRoutine(key, &routine, &cd)) return cd.ShiftIn; else return ""; } const char * GetSOCode(char key) { int (*routine)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left); JA_CES_conv_t cd; if (getConvFromEJRoutine(key, &routine, &cd)) return cd.ShiftOut; else return ""; } static void n_impr(char s) { fprintf(stderr, "conv: option %c(0x%02x) is not implemented yet... sorry\n", s, s); w3m_exit(1); } static Str conv_apply(Str is, int (*conv_func)(JA_CES_conv_t *, const char **, int *, char **, int *), JA_CES_conv_t *cd, int ascii_on_eof) { Str os; const char *from; char *to; int from_left, to_left; from = (const char *)is->ptr; from_left = is->length; os = Strnew_size(from_left); to = os->ptr; to_left = os->area_size - 1; while (from_left) while (!conv_func(cd, &from, &from_left, &to, &to_left)) { os->length = to - os->ptr; Strassure(os, SIOcode_LEN_MAX); to = &os->ptr[os->length]; to_left = os->area_size - 1 - os->length; } if (ascii_on_eof && cd->solen && cd->cset != CSET_ASCII) { os->length = to - os->ptr; Strassure(os, cd->solen); memcpy(&os->ptr[os->length], cd->ShiftOut, cd->solen); to = &os->ptr[os->length + cd->solen]; } *to = '\0'; os->length = to - os->ptr; return os; } Str conv_str(Str is, char fc, char tc) { Str os; int (*conv_func)(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left); JA_CES_conv_t cd; if (fc == tc || fc == CODE_ASCII || tc == CODE_ASCII) return is; if (fc == CODE_INNER_EUC) os = is; else { if (!getConvToEJRoutine(fc, &conv_func, &cd)) { n_impr(fc); return NULL; } os = conv_apply(is, conv_func, &cd, 0); } if (tc == CODE_INNER_EUC || tc == CODE_EUC) return os; else { if (!getConvFromEJRoutine(tc, &conv_func, &cd)) { n_impr(tc); return NULL; } return conv_apply(os, conv_func, &cd, 1); } } Str conv(char *is, char fc, char tc) { return conv_str(Strnew_charp(is), fc, tc); } /* * Convert Shift-JIS to EUC-JP */ static uchar getSLb(const uchar *ptr, uchar *ub) { /* Get Shift-JIS Lower byte */ uchar c = *ptr; *ub <<= 1; if (c < 0x9f) { if (c > 0x7e) c--; *ub -= 1; c -= 0x3f; } else { c -= 0x9e; } return c; } int cConvSE(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left) { const uchar *fb, *fe, *p; uchar *tb, *te, ub = '\0'; int state = JA_SJIS_NOSTATE, res; p = fb = (const uchar *)*from; fe = &fb[*from_left]; tb = (uchar *)*to; te = &tb[*to_left]; while (p < fe && tb < te) switch (state) { case JA_SJIS_NOSTATE: if (!(*p & 0x80)) { /* ASCII */ *tb++ = *p++; fb = p; } else if (0x81 <= *p && *p <= 0x9f) { /* JIS X 0208, 0213 */ ub = *p++ & 0x7f; state = JA_SJIS_SHIFT_L; } else if (0xe0 <= *p && *p <= 0xef /* JIS X 0208 */ /* *p <= 0xfc */ /* JIS X 0213 */ ) { ub = (*p++ & 0x7f) - 0x40; state = JA_SJIS_SHIFT_H; } else if (0xa0 <= *p && *p <= 0xdf) { /* JIS X 0201-Kana */ if (te - tb < 2) goto end; *tb++ = han2zen_tab[*p - 0xa0][0] | 0x80; *tb++ = han2zen_tab[*p++ - 0xa0][1] | 0x80; fb = p; } else fb = ++p; /* broken */ break; case JA_SJIS_SHIFT_L: case JA_SJIS_SHIFT_H: if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) { /* JIS X 0208, 0213 */ uchar lb; if (te - tb < 2) goto end; lb = getSLb(p++, &ub); *tb++ = (ub + 0x20) | 0x80; *tb++ = (lb + 0x20) | 0x80; } else if (!(*p & 0x80)) /* broken ? */ *tb++ = *p++; else ++p; /* broken */ fb = p; state = JA_SJIS_NOSTATE; break; } end: res = tb - (unsigned char *)*to; *to = (char *)tb; *to_left -= res; *from_left = fe - fb; *from = (const char *)fb; return res; } /* * Convert ISO-2022-JP to EUC-JP */ int cConvJE(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left) { const uchar *fb, *fe, *p; uchar *tb, *te, ub = '\0'; int state = JA_ISO_NOSTATE, res; p = fb = (const uchar *)*from; fe = &fb[*from_left]; tb = (uchar *)*to; te = &tb[*to_left]; while (p < fe && tb < te) switch (state) { case JA_ISO_NOSTATE: if (*p == ESC_CODE) { /* ESC sequence */ state = JA_ISO_ESC; ++p; } else if (cd->cset == CSET_ASCII || *p < 0x21) { *tb++ = *p++; fb = p; } else if (cd->cset == CSET_X0208 && *p <= 0x7e) { /* JIS X 0208 */ ub = *p++; state = JA_ISO_MBYTE1; } else if (cd->cset == CSET_X0201K && *p <= 0x5f) { /* JIS X 0201-Kana */ if (te - tb < 2) goto end; *tb++ = han2zen_tab[*p - 0x20][0] | 0x80; *tb++ = han2zen_tab[*p++ - 0x20][1] | 0x80; fb = p; } else fb = ++p; /* broken */ break; case JA_ISO_MBYTE1: if (*p == ESC_CODE) { /* ESC sequence */ state = JA_ISO_ESC; ++p; } else if (0x21 <= *p && *p <= 0x7e) { /* JIS X 0208 */ if (te - tb < 2) goto end; *tb++ = ub | 0x80; *tb++ = *p++ | 0x80; fb = p; state = JA_ISO_NOSTATE; } else { *tb++ = *p++; fb = p; state = JA_ISO_NOSTATE; } break; case JA_ISO_ESC: if (*p == '(') { /* ESC ( F */ state = JA_ISO_CS94; ++p; } else if (*p == '$') { /* ESC $ F, ESC $ ( F */ state = JA_ISO_MBCS; ++p; } else { if (te - tb < 2) goto end; *tb++ = ESC_CODE; *tb++ = *p++; fb = p; state = JA_ISO_NOSTATE; } break; case JA_ISO_CS94: if (*p == 'B' || *p == 'J' || *p == 'H') cd->cset = CSET_ASCII; else if (*p == 'I') cd->cset = CSET_X0201K; else { if (te - tb < 3) goto end; *tb++ = ESC_CODE; *tb++ = '('; *tb++ = *p; } fb = ++p; state = JA_ISO_NOSTATE; break; case JA_ISO_MBCS: if (*p == '(') { /* ESC $ ( F */ state = JA_ISO_MBCS | JA_ISO_CS94; ++p; break; } case JA_ISO_MBCS | JA_ISO_CS94: if (*p == 'B' || *p == '@') cd->cset = CSET_X0208; else { if (te - tb < 3 + (state == (JA_ISO_MBCS | JA_ISO_CS94))) goto end; *te++ = ESC_CODE; *te++ = '$'; if (state == (JA_ISO_MBCS | JA_ISO_CS94)) *te++ = '('; *te++ = *p; } fb = ++p; state = JA_ISO_NOSTATE; break; } end: res = tb - (unsigned char *)*to; *to = (char *)tb; *to_left -= res; *from_left = fe - fb; *from = (const char *)fb; return res; } int cConvEJ(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left) { const uchar *fb, *fe, *p; uchar *tb, *te, ub = '\0'; int state = JA_EUC_NOSTATE, res, euc; p = fb = (const uchar *)*from; fe = &fb[*from_left]; tb = (uchar *)*to; te = &tb[*to_left]; euc = *cd->ShiftIn ? 0 : 0x80; while (p < fe && tb < te) switch (state) { case JA_EUC_NOSTATE: if (!(*p & 0x80)) { /* ASCII */ if (!euc && cd->cset != CSET_ASCII) { if (te - tb < cd->solen) goto end; memcpy(tb, cd->ShiftOut, cd->solen); tb += cd->solen; cd->cset = CSET_ASCII; } *tb++ = *p++; fb = p; } else if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, 0213-1 */ ub = *p++; state = JA_EUC_MBYTE1; } else if (*p == EUC_SS2_CODE) { /* SS2 + JIS X 0201-Kana */ state = JA_EUC_SS2; ++p; } else if (*p == EUC_SS3_CODE) { /* SS3 + JIS X 0212, 0213-2 */ state = JA_EUC_SS3; ++p; } else fb = ++p; /* broken */ break; case JA_EUC_MBYTE1: if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, 0213-1 */ if (!euc && cd->cset != CSET_X0208) { if (te - tb < cd->silen) goto end; memcpy(tb, cd->ShiftIn, cd->silen); tb += cd->silen; cd->cset = CSET_X0208; } if (te - tb < 2) goto end; *tb++ = (ub & 0x7f) | euc; *tb++ = (*p++ & 0x7f) | euc; } else if (!(*p & 0x80)) { /* broken ? */ if (!euc && cd->cset != CSET_ASCII) { if (te - tb < cd->solen) goto end; memcpy(tb, cd->ShiftOut, cd->solen); tb += cd->solen; cd->cset = CSET_ASCII; } *tb++ = *p++; } else ++p; /* broken */ fb = p; state = JA_EUC_NOSTATE; break; case JA_EUC_SS2: if (0xa0 <= *p && *p <= 0xdf) { /* JIS X 0201-Kana */ if (!euc && cd->cset != CSET_X0208) { if (te - tb < cd->silen) goto end; memcpy(tb, cd->ShiftIn, cd->silen); tb += cd->silen; cd->cset = CSET_X0208; } if (te - tb < 2) goto end; *tb++ = han2zen_tab[*p - 0xa0][0] | euc; *tb++ = han2zen_tab[*p - 0xa0][1] | euc; } fb = ++p; state = JA_EUC_NOSTATE; break; case JA_EUC_SS3: ++p; state = (JA_EUC_SS3 | JA_EUC_MBYTE1); break; case JA_EUC_SS3 | JA_EUC_MBYTE1: fb = ++p; state = JA_EUC_NOSTATE; break; } end: res = tb - (unsigned char *)*to; *to = (char *)tb; *to_left -= res; *from_left = fe - fb; *from = (const char *)fb; return res; } /* * Convert EUC-JP to Shift-JIS */ void put_sjis(uchar ub, uchar lb, char *s) { ub -= 0x20; lb -= 0x20; if ((ub & 1) == 0) lb += 94; ub = ((ub - 1) >> 1) + 0x81; lb += 0x3f; if (ub > 0x9f) ub += 0x40; if (lb > 0x7e) lb++; s[0] = ub; s[1] = lb; } int cConvES(JA_CES_conv_t *cd, const char **from, int *from_left, char **to, int *to_left) { const uchar *fb, *fe, *p; uchar *tb, *te, ub = '\0'; int state = JA_EUC_NOSTATE, res, euc; p = fb = (const uchar *)*from; fe = &fb[*from_left]; tb = (uchar *)*to; te = &tb[*to_left]; euc = *cd->ShiftIn ? 0 : 0x80; while (p < fe && tb < te) switch (state) { case JA_EUC_NOSTATE: if (!(*p & 0x80)) { /* ASCII */ *tb++ = *p++; fb = p; } else if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, 0213-1 */ ub = *p++; state = JA_EUC_MBYTE1; } else if (*p == EUC_SS2_CODE) { /* SS2 + JIS X 0201-Kana */ state = JA_EUC_SS2; ++p; } else if (*p == EUC_SS3_CODE) { /* SS3 + JIS X 0212, 0213-2 */ state = JA_EUC_SS3; ++p; } else fb = ++p; /* broken */ break; case JA_EUC_MBYTE1: if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, 0213-1 */ if (te - tb < 2) goto end; put_sjis(ub & 0x7f, *p & 0x7f, tb); tb += 2; } else if (!(*p & 0x80)) /* broken ? */ *tb++ = *p; fb = ++p; state = JA_EUC_NOSTATE; break; case JA_EUC_SS2: if (0xa0 <= *p && *p <= 0xdf) { /* JIS X 0201-Kana */ if (te - tb < 2) goto end; put_sjis(han2zen_tab[*p - 0xa0][0], han2zen_tab[*p - 0xa0][1], tb); } fb = ++p; state = JA_EUC_NOSTATE; break; case JA_EUC_SS3: state = (JA_EUC_SS3 | JA_EUC_MBYTE1); break; case JA_EUC_SS3 | JA_EUC_MBYTE1: state = JA_EUC_NOSTATE; fb = ++p; break; } end: res = tb - (unsigned char *)*to; *to = (char *)tb; *to_left -= res; *from_left = fe - fb; *from = (const char *)fb; return res; } void JA_CES_find1(const uchar *p, JA_CES_stat_t *st) { if (st->iso != JA_CODE_ERROR && (st->si == '\0' || st->so == '\0')) { switch (ISO_STATE(st->iso)) { case JA_ISO_NOSTATE: if (*p == ESC_CODE) /* ESC sequence */ st->iso = (CODE_STATE(st->iso) | JA_ISO_ESC); break; case JA_ISO_ESC: if (*p == '(') /* ESC ( F */ st->iso = (CODE_STATE(st->iso) | JA_ISO_CS94); else if (*p == '$') /* ESC $ F, ESC $ ( F */ st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS); else st->iso = (CODE_STATE(st->iso) | JA_ISO_NOSTATE); break; case JA_ISO_CS94: if (*p == 'B' || *p == 'J' || *p == 'H') st->so = *p; else if (*p == 'I') st->iso_kana = JA_CODE_OK; st->iso = (CODE_STATE(st->iso) | JA_ISO_NOSTATE); break; case JA_ISO_MBCS: if (*p == '(') { /* ESC $ ( F */ st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_CS94); break; } case JA_ISO_MBCS | JA_ISO_CS94: if (*p == 'B' || *p == '@') st->si = *p; st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_MBYTE1); break; case JA_ISO_MBCS | JA_ISO_MBYTE1: if (*p == ESC_CODE) /* ESC sequence */ st->iso = (CODE_STATE(st->iso) | JA_ISO_ESC); else st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_CS94 | JA_ISO_MBYTE1); break; case JA_ISO_MBCS | JA_ISO_CS94 | JA_ISO_MBYTE1: st->iso = (CODE_STATE(st->iso) | JA_ISO_MBCS | JA_ISO_MBYTE1); break; } if (*p & 0x80) st->iso = JA_CODE_ERROR; } if (st->euc != JA_CODE_ERROR) { switch (EUC_STATE(st->euc)) { case JA_EUC_NOSTATE: if (!(*p & 0x80)) /* ASCII */ ; else if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */ st->euc = (CODE_STATE(st->euc) | JA_EUC_MBYTE1); else if (*p == EUC_SS2_CODE) /* SS2 + JIS X 0201-Kana */ st->euc = (CODE_STATE(st->euc) | JA_EUC_SS2); else if (*p == EUC_SS3_CODE) /* SS3 + JIS X 0212, 0213-2 */ st->euc = (CODE_STATE(st->euc) | JA_EUC_SS3); else st->euc = JA_CODE_ERROR; break; case JA_EUC_MBYTE1: if (CODE_STATE(st->euc) == JA_CODE_NORMAL) st->euc = JA_CODE_OK; case JA_EUC_SS3 | JA_EUC_MBYTE1: if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */ st->euc = (CODE_STATE(st->euc) | JA_EUC_NOSTATE); else if (st->euc & JA_CODE_BROKEN) st->euc = JA_CODE_ERROR; else st->euc = (JA_CODE_BROKEN | JA_EUC_NOSTATE); break; case JA_EUC_SS2: if (0xa0 <= *p && *p <= 0xdf) /* JIS X 0201-Kana */ st->euc = (CODE_STATE(st->euc) | JA_EUC_NOSTATE); else st->euc = JA_CODE_ERROR; break; case JA_EUC_SS3: if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0212, 0213-2 */ st->euc = (CODE_STATE(st->euc) | JA_EUC_SS3 | JA_EUC_MBYTE1); else st->euc = JA_CODE_ERROR; break; } } if (st->sjis != JA_CODE_ERROR) { switch (SJIS_STATE(st->sjis)) { case JA_SJIS_NOSTATE: if (!(*p & 0x80)) /* ASCII */ ; else if (0x81 <= *p && *p <= 0x9f) st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_SHIFT_L); else if (0xe0 <= *p && *p <= 0xef) /* JIS X 0208 */ /* else if (0xe0 <= *p && *p <= 0xfc) */ /* JIS X 0213 */ st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_SHIFT_H); else if (0xa0 == *p) st->sjis = (JA_CODE_BROKEN | JA_SJIS_NOSTATE); else if (0xa1 <= *p && *p <= 0xdf) /* JIS X 0201-Kana */ st->sjis_kana = JA_CODE_OK; else st->sjis = JA_CODE_ERROR; break; case JA_SJIS_SHIFT_L: case JA_SJIS_SHIFT_H: if (CODE_STATE(st->sjis) == JA_CODE_NORMAL) st->sjis = JA_CODE_OK; if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) /* JIS X 0208, 0213 */ st->sjis = (CODE_STATE(st->sjis) | JA_SJIS_NOSTATE); else if (st->sjis & JA_CODE_BROKEN) st->sjis = JA_CODE_ERROR; else st->sjis = (JA_CODE_BROKEN | JA_SJIS_NOSTATE); break; } } } void JA_CES_find(const uchar *p, int len, JA_CES_stat_t *st) { const uchar *endp; for (endp = &p[len] ; p < endp ; ++p) { JA_CES_find1(p, st); if (st->euc == JA_CODE_ERROR || st->sjis == JA_CODE_ERROR) break; } } char JA_CES_examin_siso(JA_CES_stat_t *st) { switch (st->si) { case '@': switch (st->so) { case 'H': return CODE_JIS_J; case 'J': return CODE_JIS_j; case 'B': return CODE_JIS_m; default: return CODE_JIS_m; } case 'B': switch (st->so) { case 'J': return CODE_JIS_N; case 'B': return CODE_JIS_n; default: return CODE_JIS_n; } default: switch (st->so) { case 'H': return CODE_JIS_J; case 'J': return CODE_JIS_N; case 'B': return CODE_JIS_n; default: return CODE_JIS_n; } } } char JA_CES_examine_stat(JA_CES_stat_t *st) { if (st->iso != JA_CODE_ERROR) { if (st->si == '\0' && st->so == '\0' && st->iso_kana != JA_CODE_OK) return '\0'; return JA_CES_examin_siso(st); } if (st->hint == CODE_EUC) { if (st->euc != JA_CODE_ERROR) return CODE_EUC; } else if (st->hint == CODE_SJIS) { if (st->sjis != JA_CODE_ERROR) return CODE_SJIS; } if (CODE_STATE(st->euc) == JA_CODE_OK) return CODE_EUC; if (CODE_STATE(st->sjis) == JA_CODE_OK) return CODE_SJIS; return CODE_UNKNOWN; } char checkShiftCode(Str buf, uchar hint) { JA_CES_stat_t st; if (hint == CODE_INNER_EUC) return '\0'; JA_CES_stat_init(&st, hint); JA_CES_find((const unsigned char *)buf->ptr, buf->length, &st); if ((st.found = JA_CES_examine_stat(&st)) != CODE_UNKNOWN) return st.found; if (CODE_STATE(st.euc) == JA_CODE_NORMAL) return CODE_EUC; if (CODE_STATE(st.sjis) == JA_CODE_NORMAL) return CODE_SJIS; return CODE_EUC; } #elif defined(MANY_CHARSET) #include "Str.h" mb_cs_detector_t *conv_cs_detector = NULL; mb_setup_t conv_mb_setup_r = {}; mb_setup_t conv_mb_setup_w = {}; mb_ws_conv_t *input_converters = NULL; mb_ws_conv_t *output_converters = NULL; void conv_init_r(const char *lang, const char *ics, const char *op, ...) { mb_cs_detector_stat_t statv[MB_CS_DETECT_CHOICEMAX]; size_t nstats; va_list ap; va_start(ap, op); if (lang && mb_lang_to_detector(lang, statv, &nstats)) { conv_cs_detector = New(mb_cs_detector_t); bzero(conv_cs_detector, sizeof(*conv_cs_detector)); if (ics) { mb_info_t dummy = {}; mb_ces_by_name(ics, &dummy); if (!(dummy.flag & MB_FLAG_UNKNOWNCS)) { mb_ces_t *ces = dummy.ces; size_t i, j; conv_cs_detector->stat[0].ces = ces; for (i = 1, j = 0 ; j < nstats && i < MB_CS_DETECT_CHOICEMAX ; ++j) if (statv[j].ces != ces) conv_cs_detector->stat[i++].ces = statv[j].ces; conv_cs_detector->nstats = i; goto end; } } memcpy(conv_cs_detector->stat, statv, sizeof(mb_cs_detector_stat_t) * nstats); conv_cs_detector->nstats = nstats; } else conv_cs_detector = NULL; end: mb_vsetsetup(&conv_mb_setup_r, op, ap); va_end(ap); } void conv_setup_r(const char *op, ...) { va_list ap; va_start(ap, op); mb_vsetsetup(&conv_mb_setup_r, op, ap); va_end(ap); } void conv_init_w(const char *op, ...) { va_list ap; va_start(ap, op); mb_vsetsetup(&conv_mb_setup_w, op, ap); va_end(ap); } void conv_setup_w(const char *op, ...) { va_list ap; va_start(ap, op); mb_vsetsetup(&conv_mb_setup_w, op, ap); va_end(ap); } static Str conv_apply_convv_cat(Str d, mb_wchar_t *wb, mb_wchar_t *we, char *b, char *e, char *s) { char mbs[BUFSIZ], *p; for (p = mbs ; wb < we ; ++wb) p += mb_wchar_to_mbc(*wb, p); if (d) Strcat_charp_n(d, mbs, p - mbs); else if (p - mbs != e - b || memcmp(mbs, b, p - mbs)) { d = Strnew_size(b - s + p - mbs); Strcat_charp_n(d, s, b - s); Strcat_charp_n(d, mbs, p - mbs); } return d; } char * conv_apply_convv(char *s, int *p_n, mb_ws_conv_t *cv, mb_info_t *info) { Str d = NULL; char *p, *ep, *q; mb_wchar_t ws[BUFSIZ / MB_MBC_LEN_MAX], *ewp; if (!cv) cv = input_converters; for (p = q = s, ep = p + *p_n, ewp = ws ; p < ep ;) { ewp = ws; p = (char *)mb_mem_to_wstr(p, ep, &ewp, ws + sizeof(ws) / sizeof(ws[0])); if (cv) mb_apply_convv(ws, ewp, cv, info); d = conv_apply_convv_cat(d, ws, ewp, q, p, s); q = p; } if (d) { *p_n = d->length; return d->ptr; } else return s; } size_t conv_ucs2mb(mb_wchar_t wc, char *buf) { mb_info_t info; mb_setup_t copy = conv_mb_setup_r; mb_setsetup(©, "@", "utf-8"); mb_mem2mb_setup(&info, NULL, 0, ©, ""); if (input_converters) mb_apply_convv(&wc, &wc + 1, input_converters, &info); return mb_wchar_to_mbc(wc, buf); } static Str conv_info2mbStr(mb_info_t *info, size_t n, Str s) { Str d = s ? NULL : Strnew_size(n); mb_wchar_t wc, ws[BUFSIZ / MB_MBC_LEN_MAX]; void *ewp; size_t i, nw = 0; do { i = info->b; ewp = ws; wc = mb_cs_detect_encode(info, MB_ENCODE_TO_WS | MB_ENCODE_SKIP_INVALID | MB_ENCODE_SKIP_SHORT, &ewp, ws + sizeof(ws) / sizeof(ws[0])); if ((mb_wchar_t *)ewp > ws) { nw += (mb_wchar_t *)ewp - ws; if (input_converters) mb_apply_convv(ws, ewp, input_converters, info); d = conv_apply_convv_cat(d, ws, ewp, &info->buf[i], &info->buf[info->b], info->buf); } } while (wc != mb_notchar_eof); return d ? d : (nw || !s->length) ? s : Strnew_size(0); } Str conv_vmem2mbStr(const char *s, size_t n, Str S, const char **p_cs, const char *op, va_list ap) { if (s && n) { mb_info_t info; mb_setup_t copy = conv_mb_setup_r; copy.cs = NULL; mb_vsetsetup(©, op, ap); if (!copy.cs && p_cs && *p_cs) copy.cs = *p_cs; if (!copy.cs) copy.cs = conv_mb_setup_r.cs; memset(&info, 0, sizeof(info)); mb_mem2mb_setup(&info, s, n, ©, "|", MB_FLAG_DONTFLUSH_BUFFER); if (info.flag & MB_FLAG_UNKNOWNCS && copy.cs && conv_mb_setup_r.cs) mb_ces_by_name(conv_mb_setup_r.cs, &info); else if (conv_cs_detector && !copy.cs) mb_bind_cs_detector(conv_cs_detector, &info); S = conv_info2mbStr(&info, n, S); if (p_cs && !*p_cs) *p_cs = info.ces->namev[0]; return S; } else return Strnew(); } Str conv_mem2mbStr(const char *s, size_t n, const char **p_cs, const char *op, ...) { va_list ap; Str d; va_start(ap, op); d = conv_vmem2mbStr(s, n, NULL, p_cs, op, ap); va_end(ap); return d; } Str conv_vstr2mbStr(const char *s, const char **p_cs, const char *op, va_list ap) { return conv_vmem2mbStr(s, s ? strlen(s) : 0, NULL, p_cs, op, ap); } Str conv_str2mbStr(const char *s, const char **p_cs, const char *op, ...) { va_list ap; Str d; va_start(ap, op); d = conv_vstr2mbStr(s, p_cs, op, ap); va_end(ap); return d; } Str conv_Str2mbStr(Str s, const char **p_cs, const char *op, ...) { va_list ap; Str d; va_start(ap, op); d = conv_vmem2mbStr(s->ptr, s->length, s, p_cs, op, ap); va_end(ap); return d; } size_t conv_Str_write(const char *s, size_t n, void *ap) { conv_Str_write_t *p = ap; p->d->length = p->info.e; Strassure(p->d, 1); p->info.buf = p->d->ptr; p->info.size = p->d->area_size; return 0; } size_t conv_default_decoder(mb_wchar_t enc, mb_info_t *info) { if (enc >= MB_WORD_ENC(mb_SBC, MB_CTL_FC, 0) && enc <= MB_WORD_ENC(mb_SBC, MB_CTL_FC, MB_SBC_UNIT - 1U)) { mb_store_octet((enc - MB_WORD_ENC(mb_SBC, MB_CTL_FC, 0)) | 0x80, info); return 1; } else return 0; } Str conv_vmem2isoStr(const char *s, size_t n, const char *op, va_list ap) { if (s && n) { conv_Str_write_t arg = {}; mb_wchar_t ws[BUFSIZ], *ewp; size_t i; int cn; arg.d = Strnew_size(n); mb_vinit_w(&arg.info, &arg, conv_Str_write, &conv_mb_setup_w, op, ap); arg.info.flag |= MB_FLAG_DONTFLUSH_BUFFER; arg.info.buf = arg.d->ptr; arg.info.size = arg.d->area_size; for (ewp = ws, i = 0 ; i < n ;) { if ((cn = mb_mem_to_wchar_internal(&s[i], n - i, *ewp)) > 0) i += cn; else ++i; if (++ewp >= ws + sizeof(ws) / sizeof(ws[0])) { mb_apply_convv(ws, ewp, output_converters, &arg.info); mb_decode(ws, ewp, &arg.info); ewp = ws; } } if (ewp > ws) { mb_apply_convv(ws, ewp, output_converters, &arg.info); mb_decode(ws, ewp, &arg.info); } mb_store_char_noconv(EOF, &arg.info); arg.d->length = arg.info.e; Strassure(arg.d, 1); arg.d->ptr[arg.d->length] = '\0'; return arg.d; } else return Strnew(); } Str conv_mem2isoStr(const char *s, size_t n, const char *op, ...) { va_list ap; Str d; va_start(ap, op); d = conv_vmem2isoStr(s, n, op, ap); va_end(ap); return d; } Str conv_vstr2isoStr(const char *s, const char *op, va_list ap) { return conv_vmem2isoStr(s, s ? strlen(s) : 0, op, ap); } Str conv_str2isoStr(const char *s, const char *op, ...) { va_list ap; Str d; va_start(ap, op); d = conv_vstr2isoStr(s, op, ap); va_end(ap); return d; } #endif /* JP_CHARSET */