w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

johab.c (9496B)


      1 
      2 #include "wc.h"
      3 #include "johab.h"
      4 #include "wtf.h"
      5 #ifdef USE_UNICODE
      6 #include "ucs.h"
      7 #endif
      8 
      9 #define C0 WC_JOHAB_MAP_C0
     10 #define GL WC_JOHAB_MAP_GL
     11 #define C1 WC_JOHAB_MAP_C1
     12 #define GH WC_JOHAB_MAP_GH
     13 #define GB WC_JOHAB_MAP_GB
     14 #define JJ WC_JOHAB_MAP_JJ
     15 #define JB WC_JOHAB_MAP_JB
     16 #define HB WC_JOHAB_MAP_HB
     17 #define CJ WC_JOHAB_MAP_CJ
     18 #define CB WC_JOHAB_MAP_CB
     19 
     20 /*
     21   00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF
     22   C0    GL    GL    GL    C0 -  -     J     J     -     H     -  H     -     -
     23   -     -     J     B     -  -  J     J     B     B     B     B  B     B     -
     24 
     25   C0    GL    GH    GB    C0 C1 CJ    JJ    JB    CB    HB    CB HB    CB    C1 
     26 */
     27 
     28 wc_uint8 WC_JOHAB_MAP[ 0x100 ] = {
     29     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     30     C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
     31 /*  20 */
     32     GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
     33 /*  30  31 */
     34     GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH,
     35 /*  40  41 */
     36     GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
     37     GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
     38     GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 
     39     GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0,
     40 
     41 /*  80          83  84 */
     42     C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ,
     43 /*  90  91 */
     44     JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
     45     JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
     46     JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
     47     JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 
     48 /*              D3  D4          D7  D8                          DF */
     49     JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, 
     50     HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB,
     51 /*                                      F9  FA              FE  FF */
     52     HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1,
     53 };
     54 
     55 static wc_uint8 johab1_N_map[ 3 ][ 32 ] = {
     56   { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
     57    15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
     58   { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11,
     59     0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 },
     60   { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
     61    16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 }
     62 };
     63 
     64 static wc_uint8 N_johab1_map[ 3 ][ 32 ] = {
     65   { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,
     66    18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
     67   { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22,
     68    23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
     69   { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
     70    17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 }
     71 };
     72 
     73 wc_wchar_t
     74 wc_johab_to_ksx1001(wc_wchar_t cc)
     75 {
     76 #ifdef USE_UNICODE
     77     static wc_table *t = NULL;
     78 #endif
     79 
     80     switch (cc.ccs) {
     81     case WC_CCS_JOHAB:
     82 	return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc));
     83     case WC_CCS_JOHAB_1:
     84     case WC_CCS_JOHAB_2:
     85 #ifdef USE_UNICODE
     86 	if (WcOption.ucs_conv) {
     87 	    if (t == NULL)
     88 		t = wc_get_ucs_table(WC_CCS_KS_X_1001);
     89 	    cc = wc_any_to_any(cc, t);
     90 	} else
     91 #endif
     92 	    cc.ccs = WC_CCS_UNKNOWN_W;
     93 	break;
     94     case WC_CCS_JOHAB_3:
     95 	if (cc.code >= 0x2121)
     96 	    cc.ccs = WC_CCS_KS_X_1001;
     97 	else
     98 	    cc.ccs = WC_CCS_UNKNOWN_W;
     99 	break;
    100     }
    101     return cc;
    102 }
    103 
    104 wc_wchar_t
    105 wc_ksx1001_to_johab(wc_wchar_t cc)
    106 {
    107     cc.code &= 0x7f7f;
    108     if ((cc.code >= 0x2121 && cc.code <  0x2421) ||
    109 	(cc.code >  0x2453 && cc.code <= 0x2C7E) ||
    110 	(cc.code >= 0x4A21 && cc.code <= 0x7D7E)) {
    111 	cc.ccs = WC_CCS_JOHAB_3;
    112 	return cc;
    113     }
    114 #ifdef USE_UNICODE
    115     if (WcOption.ucs_conv)
    116 	cc = wc_ucs_to_johab(wc_any_to_ucs(cc));
    117     else
    118 #endif
    119 	cc.ccs = WC_CCS_UNKNOWN_W;
    120     return cc;
    121 }
    122 
    123 #ifdef USE_UNICODE
    124 wc_wchar_t
    125 wc_ucs_to_johab(wc_uint32 ucs)
    126 {
    127     wc_table *t;
    128     wc_wchar_t cc;
    129 
    130     if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) {
    131 	ucs -= WC_C_UCS2_HANGUL;
    132 	cc.code = WC_N_JOHAB1(ucs);
    133 	cc.ccs = WC_CCS_JOHAB;
    134     } else if (ucs >= 0x3131 && ucs <= 0x3163) {
    135 	t = wc_get_ucs_table(WC_CCS_JOHAB_2);
    136 	cc = wc_ucs_to_any(ucs, t);
    137     } else {
    138 	t = wc_get_ucs_table(WC_CCS_JOHAB_3);
    139 	cc = wc_ucs_to_any(ucs, t);
    140     }
    141     return cc;
    142 }
    143 #endif
    144 
    145 wc_uint32
    146 wc_johab1_to_N(wc_uint32 code)
    147 {
    148     wc_uint32 a, b, c;
    149 
    150     a = johab1_N_map[0][(code >> 10) & 0x1F];
    151     b = johab1_N_map[1][(code >> 5)  & 0x1F];
    152     c = johab1_N_map[2][ code        & 0x1F];
    153     if (a && b && c)
    154 	return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1);
    155     return WC_C_JOHAB_ERROR;
    156 }
    157 
    158 wc_uint32
    159 wc_N_to_johab1(wc_uint32 code)
    160 {
    161     wc_uint32 a, b, c;
    162 
    163     a = N_johab1_map[0][(code / 28) / 21];
    164     b = N_johab1_map[1][(code / 28) % 21];
    165     c = N_johab1_map[2][ code % 28      ];
    166     return 0x8000 | (a << 10) | (b << 5) | c;
    167 }
    168 
    169 /* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E
    170   (0x1F21 - 0x207E are not in KS X 1001) */
    171 #define johab3_to_ksx1001(ub, lb) \
    172 { \
    173     if (ub < 0xe0) { \
    174 	ub = ((ub - 0xd8) << 1) + 0x1f; \
    175     } else { \
    176 	ub = ((ub - 0xe0) << 1) + 0x4a; \
    177     } \
    178     if (lb < 0xa1) { \
    179 	lb -= (lb < 0x91) ? 0x10 : 0x22; \
    180     } else { \
    181 	ub++; \
    182 	lb -= 0x80; \
    183     } \
    184 }
    185 
    186 #define ksx1001_to_johab3(ub, lb) \
    187 { \
    188     if (ub < 0x4a) { \
    189 	ub -= 0x1f; \
    190 	lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
    191 	ub = (ub >> 1) + 0xd8; \
    192     } else { \
    193 	ub -= 0x4a; \
    194 	lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
    195 	ub = (ub >> 1) + 0xe0; \
    196     } \
    197 }
    198 
    199 wc_wchar_t
    200 wc_johab_to_cs128w(wc_wchar_t cc)
    201 {
    202     wc_uint32 n;
    203     wc_uchar ub, lb;
    204 
    205     if (cc.code < 0xD800) {
    206 	n = WC_JOHAB1_N(cc.code);
    207 	if (n != WC_C_JOHAB_ERROR) {
    208 	    cc.code = WC_N_CS94x128(n);
    209 	    cc.ccs = WC_CCS_JOHAB_1;
    210 	} else {
    211 	    n = WC_JOHAB2_N(cc.code);
    212 	    cc.code = WC_N_CS128W(n);
    213 	    cc.ccs = WC_CCS_JOHAB_2;
    214 	}
    215     } else {
    216 	ub = cc.code >> 8;
    217 	lb = cc.code & 0xff;
    218 	johab3_to_ksx1001(ub, lb);
    219 	cc.code = ((wc_uint32)ub << 8) | lb;
    220 	cc.ccs = WC_CCS_JOHAB_3;
    221     }
    222     return cc;
    223 }
    224 
    225 wc_wchar_t
    226 wc_cs128w_to_johab(wc_wchar_t cc)
    227 {
    228     wc_uint32 n;
    229     wc_uchar ub, lb;
    230 
    231     switch (cc.ccs) {
    232     case WC_CCS_JOHAB_1:
    233 	n = WC_CS94x128_N(cc.code);
    234 	cc.code = WC_N_JOHAB1(n);
    235 	break;
    236     case WC_CCS_JOHAB_2:
    237 	n = WC_CS128W_N(cc.code);
    238 	cc.code = WC_N_JOHAB2(n);
    239 	break;
    240     case WC_CCS_JOHAB_3:
    241 	ub = (cc.code >> 8) & 0x7f;
    242 	lb = cc.code & 0x7f;
    243 	ksx1001_to_johab3(ub, lb);
    244 	cc.code = ((wc_uint32)ub << 8) | lb;
    245     }
    246     cc.ccs = WC_CCS_JOHAB;
    247     return cc;
    248 }
    249 
    250 Str
    251 wc_conv_from_johab(Str is, wc_ces ces)
    252 {
    253     Str os;
    254     wc_uchar *sp = (wc_uchar *)is->ptr;
    255     wc_uchar *ep = sp + is->length;
    256     wc_uchar *p;
    257     int state = WC_JOHAB_NOSTATE;
    258 
    259     for (p = sp; p < ep && *p < 0x80; p++)
    260         ;
    261     if (p == ep)
    262 	return is;
    263     os = Strnew_size(is->length);
    264     if (p > sp)
    265 	Strcat_charp_n(os, is->ptr, (int)(p - sp));
    266 
    267     for (; p < ep; p++) {
    268 	switch (state) {
    269 	case WC_JOHAB_NOSTATE:
    270 	    switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) {
    271 	    case WC_JOHAB_MAP_UJ:
    272 		state = WC_JOHAB_HANGUL1;
    273 		break;
    274 	    case WC_JOHAB_MAP_UH:
    275 		state = WC_JOHAB_HANJA1;
    276 		break;
    277 	    case WC_JOHAB_MAP_C1:
    278 		wtf_push_unknown(os, p, 1);
    279 		break;
    280 	    default:
    281 		Strcat_char(os, (char)*p);
    282 		break;
    283 	    }
    284 	    break;
    285 	case WC_JOHAB_HANGUL1:
    286 	    if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) 
    287 		wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
    288 	    else
    289 		wtf_push_unknown(os, p-1, 2);
    290 	    state = WC_JOHAB_NOSTATE;
    291 	    break;
    292 	case WC_JOHAB_HANJA1:
    293 	    if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH)
    294 		wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
    295 	    else
    296 		wtf_push_unknown(os, p-1, 2);
    297 	    state = WC_JOHAB_NOSTATE;
    298 	    break;
    299 	}
    300     }
    301     switch (state) {
    302     case WC_JOHAB_HANGUL1:
    303     case WC_JOHAB_HANJA1:
    304 	wtf_push_unknown(os, p-1, 1);
    305 	break;
    306     }
    307     return os;
    308 }
    309 
    310 void
    311 wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st)
    312 {
    313   while (1) {
    314     switch (cc.ccs) {
    315     case WC_CCS_US_ASCII:
    316 	Strcat_char(os, (char)cc.code);
    317 	return;
    318     case WC_CCS_JOHAB_1:
    319     case WC_CCS_JOHAB_2:
    320     case WC_CCS_JOHAB_3:
    321 	cc = wc_cs128w_to_johab(cc);
    322     case WC_CCS_JOHAB:
    323 	Strcat_char(os, (char)(cc.code >> 8));
    324 	Strcat_char(os, (char)(cc.code & 0xff));
    325 	return;
    326     case WC_CCS_KS_X_1001:
    327 	cc = wc_ksx1001_to_johab(cc);
    328 	continue;
    329     case WC_CCS_UNKNOWN_W:
    330 	if (!WcOption.no_replace)
    331 	    Strcat_charp(os, WC_REPLACE_W);
    332 	return;
    333     case WC_CCS_UNKNOWN:
    334 	if (!WcOption.no_replace)
    335 	    Strcat_charp(os, WC_REPLACE);
    336 	return;
    337     default:
    338 #ifdef USE_UNICODE
    339 	if (WcOption.ucs_conv)
    340 	    cc = wc_any_to_any_ces(cc, st);
    341 	else
    342 #endif
    343 	    cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    344 	continue;
    345     }
    346   }
    347 }
    348 
    349 Str
    350 wc_char_conv_from_johab(wc_uchar c, wc_status *st)
    351 {
    352     static Str os;
    353     static wc_uchar johabu;
    354 
    355     if (st->state == -1) {
    356 	st->state = WC_JOHAB_NOSTATE;
    357 	os = Strnew_size(8);
    358     }
    359 
    360     switch (st->state) {
    361     case WC_JOHAB_NOSTATE:
    362 	switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) {
    363 	case WC_JOHAB_MAP_UJ:
    364 	    johabu = c;
    365 	    st->state = WC_JOHAB_HANGUL1;
    366 	    return NULL;
    367 	case WC_JOHAB_MAP_UH:
    368 	    johabu = c;
    369 	    st->state = WC_JOHAB_HANJA1;
    370 	    return NULL;
    371 	case WC_JOHAB_MAP_C1:
    372 	    break;
    373 	default:
    374 	    Strcat_char(os, (char)c);
    375 	    break;
    376 	}
    377 	break;
    378     case WC_JOHAB_HANGUL1:
    379 	if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ)
    380 	    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
    381 	break;
    382     case WC_JOHAB_HANJA1:
    383 	if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH)
    384 	    wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
    385 	break;
    386     }
    387     st->state = -1;
    388     return os;
    389 }