iso2022.c (24593B)
1 2 #include "wc.h" 3 #include "iso2022.h" 4 #include "jis.h" 5 #include "big5.h" 6 #include "johab.h" 7 #include "wtf.h" 8 #ifdef USE_UNICODE 9 #include "ucs.h" 10 #endif 11 12 #define C0 WC_ISO_MAP_C0 13 #define C1 WC_ISO_MAP_C1 14 #define GL WC_ISO_MAP_GL 15 #define GR WC_ISO_MAP_GR 16 #define GL2 WC_ISO_MAP_GL96 17 #define GR2 WC_ISO_MAP_GR96 18 #define SO WC_ISO_MAP_SO 19 #define SI WC_ISO_MAP_SI 20 #define ESC WC_ISO_MAP_ESC 21 #define SS2 WC_ISO_MAP_SS2 22 #define SS3 WC_ISO_MAP_SS3 23 24 wc_uint8 WC_ISO_MAP[ 0x100 ] = { 25 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, SO, SI, 26 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, ESC,C0, C0, C0, C0, 27 GL2,GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 28 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 29 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 30 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 31 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 32 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL2, 33 34 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, SS2,SS3, 35 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 36 GR2,GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, 37 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, 38 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, 39 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, 40 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, 41 GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR2, 42 }; 43 44 static wc_uchar cs94_gmap[ 0x80 - WC_F_ISO_BASE ]; 45 static wc_uchar cs94w_gmap[ 0x80 - WC_F_ISO_BASE ]; 46 static wc_uchar cs96_gmap[ 0x80 - WC_F_ISO_BASE ]; 47 static wc_uchar cs96w_gmap[ 0x80 - WC_F_ISO_BASE ]; 48 static wc_uchar cs942_gmap[ 0x80 - WC_F_ISO_BASE ]; 49 50 static void 51 wtf_push_iso2022(Str os, wc_ccs ccs, wc_uint32 code) 52 { 53 switch (ccs) { 54 case WC_CCS_JIS_C_6226: 55 case WC_CCS_JIS_X_0208: 56 case WC_CCS_JIS_X_0213_1: 57 ccs = wc_jisx0208_or_jisx02131(code); 58 break; 59 case WC_CCS_JIS_X_0212: 60 case WC_CCS_JIS_X_0213_2: 61 ccs = wc_jisx0212_or_jisx02132(code); 62 break; 63 case WC_CCS_JIS_X_0201: 64 case WC_CCS_GB_1988: 65 ccs = WC_CCS_US_ASCII; 66 break; 67 } 68 wtf_push(os, ccs, code); 69 } 70 71 Str 72 wc_conv_from_iso2022(Str is, wc_ces ces) 73 { 74 Str os; 75 wc_uchar *sp = (wc_uchar *)is->ptr; 76 wc_uchar *ep = sp + is->length; 77 wc_uchar *p, *q = NULL; 78 int state = WC_ISO_NOSTATE; 79 wc_status st; 80 wc_ccs gl_ccs, gr_ccs; 81 82 for (p = sp; p < ep && !(WC_ISO_MAP[*p] & WC_ISO_MAP_DETECT); p++) 83 ; 84 if (p == ep) 85 return is; 86 os = Strnew_size(is->length); 87 if (p > sp) 88 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 89 90 wc_input_init(ces, &st); 91 gl_ccs = st.design[st.gl]; 92 gr_ccs = st.design[st.gr]; 93 94 for (; p < ep; p++) { 95 switch (state) { 96 case WC_ISO_NOSTATE: 97 switch (WC_ISO_MAP[*p]) { 98 case GL2: 99 gl_ccs = st.ss ? st.design[st.ss] 100 : st.design[st.gl]; 101 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { 102 Strcat_char(os, (char)*p); 103 break; 104 } 105 case GL: 106 gl_ccs = st.ss ? st.design[st.ss] 107 : st.design[st.gl]; 108 if (WC_CCS_IS_WIDE(gl_ccs)) { 109 q = p; 110 state = WC_ISO_MBYTE1; 111 continue; 112 } else if (gl_ccs == WC_CES_US_ASCII) 113 Strcat_char(os, (char)*p); 114 else 115 wtf_push_iso2022(os, gl_ccs, (wc_uint32)*p); 116 break; 117 case GR2: 118 gr_ccs = st.ss ? st.design[st.ss] 119 : st.design[st.gr]; 120 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { 121 wtf_push_unknown(os, p, 1); 122 break; 123 } 124 case GR: 125 gr_ccs = st.ss ? st.design[st.ss] 126 : st.design[st.gr]; 127 if (WC_CCS_IS_WIDE(gr_ccs)) { 128 q = p; 129 state = WC_EUC_MBYTE1; 130 continue; 131 } else if (gr_ccs) 132 wtf_push_iso2022(os, gr_ccs, (wc_uint32)*p); 133 else 134 wtf_push_unknown(os, p, 1); 135 break; 136 case C0: 137 Strcat_char(os, (char)*p); 138 break; 139 case C1: 140 wtf_push(os, WC_CCS_C1, (wc_uint32)*p); 141 break; 142 case ESC: 143 st.ss = 0; 144 if (wc_parse_iso2022_esc(&p, &st)) 145 state = st.state; 146 else 147 Strcat_char(os, (char)*p); 148 continue; 149 case SI: 150 st.gl = 0; 151 break; 152 case SO: 153 st.gl = 1; 154 break; 155 case SS2: 156 if (! st.design[2]) { 157 wtf_push_unknown(os, p, 1); 158 break; 159 } 160 st.ss = 2; 161 continue; 162 case SS3: 163 if (! st.design[3]) { 164 wtf_push_unknown(os, p, 1); 165 break; 166 } 167 st.ss = 3; 168 continue; 169 } 170 break; 171 case WC_ISO_MBYTE1: 172 switch (WC_ISO_MAP[*p]) { 173 case GL2: 174 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { 175 Strcat_char(os, (char)*q); 176 Strcat_char(os, (char)*p); 177 break; 178 } 179 case GL: 180 wtf_push_iso2022(os, gl_ccs, ((wc_uint32)*q << 8) | *p); 181 break; 182 default: 183 wtf_push_unknown(os, q, 2); 184 break; 185 } 186 break; 187 case WC_EUC_MBYTE1: 188 switch (WC_ISO_MAP[*p]) { 189 case GR2: 190 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { 191 wtf_push_unknown(os, q, 2); 192 break; 193 } 194 case GR: 195 if (gr_ccs == WC_CCS_CNS_11643_X) { 196 state = WC_EUC_TW_MBYTE2; 197 continue; 198 } 199 wtf_push_iso2022(os, gr_ccs, ((wc_uint32)*q << 8) | *p); 200 break; 201 default: 202 wtf_push_unknown(os, q, 2); 203 break; 204 } 205 break; 206 case WC_EUC_TW_MBYTE2: 207 if (WC_ISO_MAP[*p] == GR) { 208 if (0xa1 <= *q && *q <= 0xa7) { 209 wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (*q - 0xa1), 210 ((wc_uint32)*(q+1) << 8) | *p); 211 break; 212 } 213 if (0xa8 <= *q && *q <= 0xb0) { 214 wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (*q - 0xa8), 215 ((wc_uint32)*(q+1) << 8) | *p); 216 break; 217 } 218 } 219 wtf_push_unknown(os, q, 3); 220 break; 221 case WC_ISO_CSWSR: 222 if (*p == WC_C_ESC && *(p+1) == WC_C_CSWSR) { 223 if (*(p+2) == WC_F_ISO_BASE) { 224 state = st.state = WC_ISO_NOSTATE; 225 p += 2; 226 continue; 227 } else if (*(p+2) > WC_F_ISO_BASE && *(p+2) <= 0x7e) { 228 p += 2; 229 continue; 230 } 231 } 232 wtf_push_unknown(os, p, 1); 233 continue; 234 case WC_ISO_CSWOSR: 235 wtf_push_unknown(os, p, ep - p); 236 return os; 237 break; 238 } 239 st.ss = 0; 240 state = WC_ISO_NOSTATE; 241 } 242 switch (state) { 243 case WC_ISO_MBYTE1: 244 case WC_EUC_MBYTE1: 245 wtf_push_unknown(os, p-1, 1); 246 break; 247 case WC_EUC_TW_MBYTE1: 248 wtf_push_unknown(os, p-2, 2); 249 break; 250 } 251 return os; 252 } 253 254 int 255 wc_parse_iso2022_esc(wc_uchar **ptr, wc_status *st) 256 { 257 wc_uchar *p = *ptr, state, f = 0, g = 0, cs = 0; 258 259 if (*p != WC_C_ESC) 260 return 0; 261 state = *p; 262 for (p++; *p && state; p++) { 263 switch (state) { 264 case WC_C_ESC: /* ESC */ 265 switch (*p) { 266 case WC_C_MBCS: /* ESC '$' */ 267 state = *p; 268 continue; 269 case WC_C_G0_CS94: /* ESC '(' */ 270 case WC_C_G1_CS94: /* ESC ')' */ 271 case WC_C_G2_CS94: /* ESC '*' */ 272 case WC_C_G3_CS94: /* ESC '+' */ 273 state = cs = WC_C_G0_CS94; 274 g = *p & 0x03; 275 continue; 276 case WC_C_G0_CS96: /* ESC ',' */ /* ISO 2022 does not permit */ 277 case WC_C_G1_CS96: /* ESC '-' */ 278 case WC_C_G2_CS96: /* ESC '.' */ 279 case WC_C_G3_CS96: /* ESC '/' */ 280 state = cs = WC_C_G0_CS96; 281 g = *p & 0x03; 282 continue; 283 case WC_C_C0: /* ESC '!' */ /* not suported */ 284 case WC_C_C1: /* ESC '"' */ /* not suported */ 285 case WC_C_REP: /* ESC '&' */ /* not suported */ 286 state = cs = WC_C_C0; 287 continue; 288 case WC_C_CSWSR: /* ESC '%' */ /* not suported */ 289 state = cs = WC_C_CSWSR; 290 continue; 291 case WC_C_SS2: /* ESC 'N' */ 292 st->ss = 2; *ptr = p; return 1; 293 case WC_C_SS3: /* ESC 'O' */ 294 st->ss = 3; *ptr = p; return 1; 295 case WC_C_LS2: /* ESC 'n' */ 296 st->gl = 2; *ptr = p; return 1; 297 case WC_C_LS3: /* ESC 'o' */ 298 st->gl = 3; *ptr = p; return 1; 299 case WC_C_LS1R: /* ESC '~' */ 300 st->gr = 1; *ptr = p; return 1; 301 case WC_C_LS2R: /* ESC '}' */ 302 st->gr = 2; *ptr = p; return 1; 303 case WC_C_LS3R: /* ESC '|' */ 304 st->gr = 3; *ptr = p; return 1; 305 default: 306 return 0; 307 } 308 break; 309 case WC_C_MBCS: /* ESC '$' */ 310 switch (*p) { 311 case WC_F_JIS_C_6226: /* ESC '$' @ */ 312 case WC_F_JIS_X_0208: /* ESC '$' B */ 313 case WC_F_GB_2312: /* ESC '$' A */ 314 state = 0; 315 cs = WC_C_G0_CS94 | 0x80; 316 g = 0; 317 f = *p; 318 break; 319 case WC_C_G0_CS94: /* ESC '$' '(' */ 320 case WC_C_G1_CS94: /* ESC '$' ')' */ 321 case WC_C_G2_CS94: /* ESC '$' '*' */ 322 case WC_C_G3_CS94: /* ESC '$' '+' */ 323 state = cs = WC_C_G0_CS94 | 0x80; 324 g = *p & 0x03; 325 continue; 326 case WC_C_G0_CS96: /* ESC '$' ',' */ /* ISO 2022 does not permit */ 327 case WC_C_G1_CS96: /* ESC '$' '-' */ 328 case WC_C_G2_CS96: /* ESC '$' '.' */ 329 case WC_C_G3_CS96: /* ESC '$' '/' */ 330 state = cs = WC_C_G0_CS96 | 0x80; 331 g = *p & 0x03; 332 continue; 333 default: 334 return 0; 335 } 336 break; 337 case WC_C_G0_CS94: /* ESC [()*+] F */ 338 if (*p == WC_C_CS942) { /* ESC [()*+] '!' */ 339 state = cs = WC_C_CS942 | 0x80; 340 g = *p & 0x03; 341 continue; 342 } 343 case WC_C_G0_CS96: /* ESC [,-./] F */ 344 case WC_C_G0_CS94 | 0x80: /* ESC '$' [()*+] F */ 345 case WC_C_G0_CS96 | 0x80: /* ESC '$' [,-./] F */ 346 case WC_C_CS942 | 0x80: /* ESC [()*+] '!' F */ 347 case WC_C_C0: /* ESC [!"&] F */ 348 case WC_C_CSWSR | 0x80: /* ESC '%' '/' F */ 349 state = 0; 350 f = *p; 351 break; 352 case WC_C_CSWSR: /* ESC '%' F */ 353 if (*p == WC_C_CSWOSR) { /* ESC '%' '/' */ 354 state = cs = WC_C_CSWSR | 0x80; 355 continue; 356 } 357 state = 0; 358 f = *p; 359 break; 360 default: 361 return 0; 362 } 363 } 364 if (f < WC_F_ISO_BASE || f > 0x7e) 365 return 0; 366 switch (cs) { 367 case WC_C_G0_CS94: 368 st->design[g] = WC_CCS_SET_CS94(f); 369 break; 370 case WC_C_G0_CS94 | 0x80: 371 st->design[g] = WC_CCS_SET_CS94W(f); 372 break; 373 case WC_C_G0_CS96: 374 st->design[g] = WC_CCS_SET_CS96(f); 375 break; 376 case WC_C_G0_CS96 | 0x80: 377 st->design[g] = WC_CCS_SET_CS96W(f); 378 break; 379 case WC_C_CS942 | 0x80: 380 st->design[g] = WC_CCS_SET_CS942(f); 381 break; 382 case WC_C_CSWSR: 383 if (f == WC_F_ISO_BASE) 384 st->state = WC_ISO_NOSTATE; 385 else 386 st->state = WC_ISO_CSWSR; 387 break; 388 case WC_C_CSWOSR: 389 st->state = WC_ISO_CSWOSR; 390 break; 391 } 392 *ptr = p - 1; 393 return 1; 394 } 395 396 void 397 wc_push_to_iso2022(Str os, wc_wchar_t cc, wc_status *st) 398 { 399 wc_uchar g = 0; 400 wc_bool is_wide = WC_FALSE, retry = WC_FALSE; 401 wc_wchar_t cc2; 402 403 while (1) { 404 switch (WC_CCS_TYPE(cc.ccs)) { 405 case WC_CCS_A_CS94: 406 if (cc.ccs == WC_CCS_US_ASCII) 407 cc.ccs = st->g0_ccs; 408 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 409 break; 410 case WC_CCS_A_CS94W: 411 is_wide = 1; 412 switch (cc.ccs) { 413 #ifdef USE_UNICODE 414 case WC_CCS_JIS_X_0212: 415 if (!WcOption.use_jisx0212 && WcOption.use_jisx0213 && 416 WcOption.ucs_conv) { 417 cc2 = wc_jisx0212_to_jisx0213(cc); 418 if (cc2.ccs == WC_CCS_JIS_X_0213_1 || 419 cc2.ccs == WC_CCS_JIS_X_0213_2) { 420 cc = cc2; 421 continue; 422 } 423 } 424 break; 425 case WC_CCS_JIS_X_0213_1: 426 case WC_CCS_JIS_X_0213_2: 427 if (!WcOption.use_jisx0213 && WcOption.use_jisx0212 && 428 WcOption.ucs_conv) { 429 cc2 = wc_jisx0213_to_jisx0212(cc); 430 if (cc2.ccs == WC_CCS_JIS_X_0212) { 431 cc = cc2; 432 continue; 433 } 434 } 435 break; 436 #endif 437 } 438 g = cs94w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 439 break; 440 case WC_CCS_A_CS96: 441 g = cs96_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 442 break; 443 case WC_CCS_A_CS96W: 444 is_wide = 1; 445 g = cs96w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 446 break; 447 case WC_CCS_A_CS942: 448 g = cs942_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 449 break; 450 case WC_CCS_A_UNKNOWN_W: 451 if (WcOption.no_replace) 452 return; 453 is_wide = 1; 454 cc.ccs = WC_CCS_US_ASCII; 455 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 456 cc.code = ((wc_uint32)WC_REPLACE_W[0] << 8) | WC_REPLACE_W[1]; 457 break; 458 case WC_CCS_A_UNKNOWN: 459 if (WcOption.no_replace) 460 return; 461 cc.ccs = WC_CCS_US_ASCII; 462 g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; 463 cc.code = (wc_uint32)WC_REPLACE[0]; 464 break; 465 default: 466 if ((cc.ccs == WC_CCS_JOHAB || WC_CCS_JOHAB_1 || 467 cc.ccs == WC_CCS_JOHAB_2 || cc.ccs == WC_CCS_JOHAB_3) && 468 cs94w_gmap[WC_F_KS_X_1001 - WC_F_ISO_BASE]) { 469 wc_wchar_t cc2 = wc_johab_to_ksx1001(cc); 470 if (cc2.ccs == WC_CCS_KS_X_1001) { 471 cc = cc2; 472 continue; 473 } 474 } 475 #ifdef USE_UNICODE 476 if (WcOption.ucs_conv) 477 cc = wc_any_to_iso2022(cc, st); 478 else 479 #endif 480 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 481 continue; 482 } 483 if (! g) { 484 #ifdef USE_UNICODE 485 if (WcOption.ucs_conv && ! retry) 486 cc = wc_any_to_any_ces(cc, st); 487 else 488 #endif 489 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 490 retry = WC_TRUE; 491 continue; 492 } 493 494 wc_push_iso2022_esc(os, cc.ccs, g, 1, st); 495 if (is_wide) 496 Strcat_char(os, (char)((cc.code >> 8) & 0x7f)); 497 Strcat_char(os, (char)(cc.code & 0x7f)); 498 return; 499 } 500 } 501 502 void 503 wc_push_to_iso2022_end(Str os, wc_status *st) 504 { 505 if (st->design[1] != 0 && st->design[1] != st->g1_ccs) 506 wc_push_iso2022_esc(os, st->g1_ccs, WC_C_G1_CS94, 0, st); 507 wc_push_iso2022_esc(os, st->g0_ccs, WC_C_G0_CS94, 1, st); 508 } 509 510 void 511 wc_push_iso2022_esc(Str os, wc_ccs ccs, wc_uchar g, wc_uint8 invoke, wc_status *st) 512 { 513 wc_uint8 g_invoke = g & 0x03; 514 515 if (st->design[g_invoke] != ccs) { 516 Strcat_char(os, WC_C_ESC); 517 if (WC_CCS_IS_WIDE(ccs)) { 518 Strcat_char(os, WC_C_MBCS); 519 if (g_invoke != 0 || 520 (ccs != WC_CCS_JIS_C_6226 && 521 ccs != WC_CCS_JIS_X_0208 && 522 ccs != WC_CCS_GB_2312)) 523 Strcat_char(os, (char)g); 524 } else { 525 Strcat_char(os, (char)g); 526 if ((ccs & WC_CCS_A_ISO_2022) == WC_CCS_A_CS942) 527 Strcat_char(os, WC_C_CS942); 528 } 529 Strcat_char(os, (char)WC_CCS_GET_F(ccs)); 530 st->design[g_invoke] = ccs; 531 } 532 if (! invoke) 533 return; 534 535 switch (g_invoke) { 536 case 0: 537 if (st->gl != 0) { 538 Strcat_char(os, WC_C_SI); 539 st->gl = 0; 540 } 541 break; 542 case 1: 543 if (st->gl != 1) { 544 Strcat_char(os, WC_C_SO); 545 st->gl = 1; 546 } 547 break; 548 case 2: 549 Strcat_char(os, WC_C_ESC); 550 Strcat_char(os, WC_C_SS2); 551 break; 552 case 3: 553 Strcat_char(os, WC_C_ESC); 554 Strcat_char(os, WC_C_SS3); 555 break; 556 } 557 } 558 559 void 560 wc_push_to_euc(Str os, wc_wchar_t cc, wc_status *st) 561 { 562 wc_ccs g1_ccs = st->ces_info->gset[1].ccs; 563 564 while (1) { 565 if (cc.ccs == g1_ccs) { 566 Strcat_char(os, (char)((cc.code >> 8) | 0x80)); 567 Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); 568 return; 569 } 570 switch (cc.ccs) { 571 case WC_CCS_US_ASCII: 572 Strcat_char(os, (char)cc.code); 573 return; 574 case WC_CCS_C1: 575 Strcat_char(os, (char)(cc.code | 0x80)); 576 return; 577 case WC_CCS_UNKNOWN_W: 578 if (!WcOption.no_replace) 579 Strcat_charp(os, WC_REPLACE_W); 580 return; 581 case WC_CCS_UNKNOWN: 582 if (!WcOption.no_replace) 583 Strcat_charp(os, WC_REPLACE); 584 return; 585 case WC_CCS_JOHAB: 586 case WC_CCS_JOHAB_1: 587 case WC_CCS_JOHAB_2: 588 case WC_CCS_JOHAB_3: 589 if (st->ces_info->id == WC_CES_EUC_KR) { 590 cc = wc_johab_to_ksx1001(cc); 591 continue; 592 } 593 default: 594 #ifdef USE_UNICODE 595 if (WcOption.ucs_conv) 596 cc = wc_any_to_any_ces(cc, st); 597 else 598 #endif 599 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 600 continue; 601 } 602 } 603 } 604 605 void 606 wc_push_to_eucjp(Str os, wc_wchar_t cc, wc_status *st) 607 { 608 while (1) { 609 switch (cc.ccs) { 610 case WC_CCS_US_ASCII: 611 Strcat_char(os, (char)cc.code); 612 return; 613 case WC_CCS_JIS_X_0201K: 614 if (WcOption.use_jisx0201k) { 615 Strcat_char(os, WC_C_SS2R); 616 Strcat_char(os, (char)(cc.code | 0x80)); 617 return; 618 } else if (WcOption.fix_width_conv) 619 cc.ccs = WC_CCS_UNKNOWN; 620 else 621 cc = wc_jisx0201k_to_jisx0208(cc); 622 continue; 623 case WC_CCS_JIS_X_0208: 624 break; 625 case WC_CCS_JIS_X_0213_1: 626 if (WcOption.use_jisx0213) 627 break; 628 #ifdef USE_UNICODE 629 else if (WcOption.ucs_conv && WcOption.use_jisx0212) 630 cc = wc_jisx0213_to_jisx0212(cc); 631 #endif 632 else 633 cc.ccs = WC_CCS_UNKNOWN_W; 634 continue; 635 case WC_CCS_JIS_X_0212: 636 if (WcOption.use_jisx0212) { 637 Strcat_char(os, WC_C_SS3R); 638 break; 639 } 640 #ifdef USE_UNICODE 641 else if (WcOption.ucs_conv && WcOption.use_jisx0213) 642 cc = wc_jisx0212_to_jisx0213(cc); 643 #endif 644 else 645 cc.ccs = WC_CCS_UNKNOWN_W; 646 continue; 647 case WC_CCS_JIS_X_0213_2: 648 if (WcOption.use_jisx0213) { 649 Strcat_char(os, WC_C_SS3R); 650 break; 651 } 652 #ifdef USE_UNICODE 653 else if (WcOption.ucs_conv && WcOption.use_jisx0212) 654 cc = wc_jisx0213_to_jisx0212(cc); 655 #endif 656 else 657 cc.ccs = WC_CCS_UNKNOWN_W; 658 continue; 659 case WC_CCS_C1: 660 Strcat_char(os, (char)(cc.code | 0x80)); 661 return; 662 case WC_CCS_UNKNOWN_W: 663 if (!WcOption.no_replace) 664 Strcat_charp(os, WC_REPLACE_W); 665 return; 666 case WC_CCS_UNKNOWN: 667 if (!WcOption.no_replace) 668 Strcat_charp(os, WC_REPLACE); 669 return; 670 default: 671 #ifdef USE_UNICODE 672 if (WcOption.ucs_conv) 673 cc = wc_any_to_any_ces(cc, st); 674 else 675 #endif 676 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 677 continue; 678 } 679 Strcat_char(os, (char)((cc.code >> 8) | 0x80)); 680 Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); 681 return; 682 } 683 } 684 685 void 686 wc_push_to_euctw(Str os, wc_wchar_t cc, wc_status *st) 687 { 688 while (1) { 689 switch (cc.ccs) { 690 case WC_CCS_US_ASCII: 691 Strcat_char(os, (char)cc.code); 692 return; 693 case WC_CCS_CNS_11643_1: 694 break; 695 case WC_CCS_CNS_11643_2: 696 case WC_CCS_CNS_11643_3: 697 case WC_CCS_CNS_11643_4: 698 case WC_CCS_CNS_11643_5: 699 case WC_CCS_CNS_11643_6: 700 case WC_CCS_CNS_11643_7: 701 Strcat_char(os, WC_C_SS2R); 702 Strcat_char(os, (char)(0xA1 + (cc.ccs - WC_CCS_CNS_11643_1))); 703 break; 704 case WC_CCS_CNS_11643_8: 705 case WC_CCS_CNS_11643_9: 706 case WC_CCS_CNS_11643_10: 707 case WC_CCS_CNS_11643_11: 708 case WC_CCS_CNS_11643_12: 709 case WC_CCS_CNS_11643_13: 710 case WC_CCS_CNS_11643_14: 711 case WC_CCS_CNS_11643_15: 712 case WC_CCS_CNS_11643_16: 713 Strcat_char(os, WC_C_SS2R); 714 Strcat_char(os, (char)(0xA8 + (cc.ccs - WC_CCS_CNS_11643_8))); 715 break; 716 case WC_CCS_C1: 717 Strcat_char(os, (char)(cc.code | 0x80)); 718 return; 719 case WC_CCS_UNKNOWN_W: 720 if (!WcOption.no_replace) 721 Strcat_charp(os, WC_REPLACE_W); 722 return; 723 case WC_CCS_UNKNOWN: 724 if (!WcOption.no_replace) 725 Strcat_charp(os, WC_REPLACE); 726 return; 727 default: 728 #ifdef USE_UNICODE 729 if (WcOption.ucs_conv) 730 cc = wc_any_to_any_ces(cc, st); 731 else 732 #endif 733 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 734 continue; 735 } 736 Strcat_char(os, (char)((cc.code >> 8) | 0x80)); 737 Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); 738 return; 739 } 740 } 741 742 void 743 wc_push_to_iso8859(Str os, wc_wchar_t cc, wc_status *st) 744 { 745 wc_ccs g1_ccs = st->ces_info->gset[1].ccs; 746 747 while (1) { 748 if (cc.ccs == g1_ccs) { 749 Strcat_char(os, (char)(cc.code | 0x80)); 750 return; 751 } 752 switch (cc.ccs) { 753 case WC_CCS_US_ASCII: 754 Strcat_char(os, (char)cc.code); 755 return; 756 case WC_CCS_C1: 757 Strcat_char(os, (char)(cc.code | 0x80)); 758 return; 759 case WC_CCS_UNKNOWN_W: 760 if (!WcOption.no_replace) 761 Strcat_charp(os, WC_REPLACE_W); 762 return; 763 case WC_CCS_UNKNOWN: 764 if (!WcOption.no_replace) 765 Strcat_charp(os, WC_REPLACE); 766 return; 767 default: 768 #ifdef USE_UNICODE 769 if (WcOption.ucs_conv) 770 cc = wc_any_to_any_ces(cc, st); 771 else 772 #endif 773 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 774 continue; 775 } 776 } 777 } 778 779 void 780 wc_create_gmap(wc_status *st) 781 { 782 wc_gset *gset = st->ces_info->gset; 783 wc_uchar *gset_ext = st->ces_info->gset_ext; 784 int i, f; 785 786 if (WcOption.strict_iso2022) { 787 for (i = 0; i < WC_F_ISO_BASE; i++) { 788 cs94_gmap[i] = 0; 789 cs96_gmap[i] = 0; 790 cs94w_gmap[i] = 0; 791 cs96w_gmap[i] = 0; 792 cs942_gmap[i] = 0; 793 } 794 } else { 795 for (i = 0; i < WC_F_ISO_BASE; i++) { 796 cs94_gmap[i] = gset_ext[0]; 797 cs96_gmap[i] = gset_ext[1]; 798 cs94w_gmap[i] = gset_ext[2]; 799 cs96w_gmap[i] = gset_ext[3]; 800 cs942_gmap[i] = gset_ext[0]; 801 } 802 } 803 for (i = 0; gset[i].ccs; i++) { 804 f = WC_CCS_GET_F(gset[i].ccs) - WC_F_ISO_BASE; 805 switch (WC_CCS_TYPE(gset[i].ccs)) { 806 case WC_CCS_A_CS94: 807 switch (gset[i].ccs) { 808 case WC_CCS_JIS_X_0201K: 809 if (!WcOption.use_jisx0201k) 810 continue; 811 break; 812 } 813 cs94_gmap[f] = gset[i].g; 814 break; 815 case WC_CCS_A_CS94W: 816 switch (gset[i].ccs) { 817 case WC_CCS_JIS_X_0212: 818 if (!WcOption.use_jisx0212) 819 continue; 820 break; 821 case WC_CCS_JIS_X_0213_1: 822 case WC_CCS_JIS_X_0213_2: 823 if (!WcOption.use_jisx0213) 824 continue; 825 break; 826 } 827 cs94w_gmap[f] = gset[i].g; 828 break; 829 case WC_CCS_A_CS96: 830 cs96_gmap[f] = gset[i].g; 831 break; 832 case WC_CCS_A_CS96W: 833 cs96w_gmap[f] = gset[i].g; 834 break; 835 case WC_CCS_A_CS942: 836 cs942_gmap[f] = gset[i].g; 837 break; 838 } 839 } 840 } 841 842 Str 843 wc_char_conv_from_iso2022(wc_uchar c, wc_status *st) 844 { 845 static Str os; 846 static wc_uchar buf[4]; 847 static size_t nbuf; 848 wc_uchar *p; 849 wc_ccs gl_ccs, gr_ccs; 850 851 if (st->state == -1) { 852 st->state = WC_ISO_NOSTATE; 853 os = Strnew_size(8); 854 nbuf = 0; 855 } 856 857 gl_ccs = st->ss ? st->design[st->ss] : st->design[st->gl]; 858 gr_ccs = st->ss ? st->design[st->ss] : st->design[st->gr]; 859 860 switch (st->state) { 861 case WC_ISO_NOSTATE: 862 switch (WC_ISO_MAP[c]) { 863 case GL2: 864 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { 865 Strcat_char(os, (char)c); 866 break; 867 } 868 case GL: 869 if (WC_CCS_IS_WIDE(gl_ccs)) { 870 buf[nbuf++] = c; 871 st->state = WC_ISO_MBYTE1; 872 return NULL; 873 } else if (gl_ccs == WC_CES_US_ASCII) 874 Strcat_char(os, (char)c); 875 else 876 wtf_push_iso2022(os, gl_ccs, (wc_uint32)c); 877 break; 878 case GR2: 879 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) 880 break; 881 case GR: 882 if (WC_CCS_IS_WIDE(gr_ccs)) { 883 buf[nbuf++] = c; 884 st->state = WC_EUC_MBYTE1; 885 return NULL; 886 } else if (gr_ccs) 887 wtf_push_iso2022(os, gr_ccs, (wc_uint32)c); 888 break; 889 case C0: 890 Strcat_char(os, (char)c); 891 break; 892 case C1: 893 break; 894 case ESC: 895 buf[nbuf++] = c; 896 st->state = WC_C_ESC; 897 return NULL; 898 case SI: 899 st->gl = 0; 900 break; 901 case SO: 902 st->gl = 1; 903 break; 904 case SS2: 905 if (! st->design[2]) 906 return os; 907 st->ss = 2; 908 return NULL; 909 case SS3: 910 if (! st->design[3]) 911 return os; 912 st->ss = 3; 913 return NULL; 914 } 915 break; 916 case WC_ISO_MBYTE1: 917 switch (WC_ISO_MAP[c]) { 918 case GL2: 919 if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) 920 break; 921 case GL: 922 buf[nbuf++] = c; 923 wtf_push_iso2022(os, gl_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); 924 break; 925 } 926 st->state = WC_ISO_NOSTATE; 927 break; 928 case WC_EUC_MBYTE1: 929 switch (WC_ISO_MAP[c]) { 930 case GR2: 931 if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) 932 break; 933 case GR: 934 if (gr_ccs == WC_CCS_CNS_11643_X) { 935 buf[nbuf++] = c; 936 st->state = WC_EUC_TW_MBYTE2; 937 return NULL; 938 } 939 buf[nbuf++] = c; 940 wtf_push_iso2022(os, gr_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); 941 break; 942 } 943 st->state = WC_ISO_NOSTATE; 944 break; 945 case WC_EUC_TW_MBYTE2: 946 if (WC_ISO_MAP[c] == GR) { 947 buf[nbuf++] = c; 948 c = buf[0]; 949 if (0xa1 <= c && c <= 0xa7) { 950 wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (c - 0xa1), 951 ((wc_uint32)buf[1] << 8) | buf[2]); 952 break; 953 } 954 if (0xa8 <= c && c <= 0xb0) { 955 wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (c - 0xa8), 956 ((wc_uint32)buf[1] << 8) | buf[2]); 957 break; 958 } 959 } 960 st->state = WC_ISO_NOSTATE; 961 break; 962 case WC_C_ESC: 963 switch (c) { 964 case WC_C_G0_CS94: 965 case WC_C_G1_CS94: 966 case WC_C_G2_CS94: 967 case WC_C_G3_CS94: 968 buf[nbuf++] = c; 969 st->state = WC_C_G0_CS94; 970 return NULL; 971 case WC_C_G0_CS96: 972 case WC_C_G1_CS96: 973 case WC_C_G2_CS96: 974 case WC_C_G3_CS96: 975 case WC_C_C0: 976 case WC_C_C1: 977 case WC_C_REP: 978 buf[nbuf++] = c; 979 st->state = WC_C_G0_CS96; 980 return NULL; 981 case WC_C_MBCS: 982 case WC_C_CSWSR: 983 buf[nbuf++] = c; 984 st->state = c; 985 return NULL; 986 case WC_C_SS2: 987 st->ss = 2; 988 st->state = WC_ISO_NOSTATE; 989 return NULL; 990 case WC_C_SS3: 991 st->ss = 3; 992 st->state = WC_ISO_NOSTATE; 993 return NULL; 994 case WC_C_LS2: 995 st->gl = 2; 996 break; 997 case WC_C_LS3: 998 st->gl = 3; 999 break; 1000 case WC_C_LS2R: 1001 st->gr = 2; 1002 break; 1003 case WC_C_LS3R: 1004 st->gr = 3; 1005 break; 1006 default: 1007 break; 1008 } 1009 break; 1010 case WC_C_MBCS: 1011 switch (c) { 1012 case WC_F_JIS_C_6226: 1013 case WC_F_JIS_X_0208: 1014 case WC_F_GB_2312: 1015 buf[nbuf++] = c; 1016 p = buf; 1017 wc_parse_iso2022_esc(&p, st); 1018 break; 1019 case WC_C_G0_CS94: 1020 case WC_C_G1_CS94: 1021 case WC_C_G2_CS94: 1022 case WC_C_G3_CS94: 1023 case WC_C_G0_CS96: 1024 case WC_C_G1_CS96: 1025 case WC_C_G2_CS96: 1026 case WC_C_G3_CS96: 1027 buf[nbuf++] = c; 1028 st->state = WC_C_G0_CS96; 1029 return NULL; 1030 } 1031 break; 1032 case WC_C_CSWSR: 1033 switch (c) { 1034 case WC_C_CSWOSR: 1035 buf[nbuf++] = c; 1036 st->state = WC_C_G1_CS94; 1037 return NULL; 1038 } 1039 buf[nbuf++] = c; 1040 p = buf; 1041 wc_parse_iso2022_esc(&p, st); 1042 break; 1043 case WC_C_G0_CS94: 1044 switch (c) { 1045 case WC_C_CS942: 1046 buf[nbuf++] = c; 1047 st->state = WC_C_G0_CS96; 1048 return NULL; 1049 } 1050 case WC_C_G0_CS96: 1051 buf[nbuf++] = c; 1052 p = buf; 1053 wc_parse_iso2022_esc(&p, st); 1054 break; 1055 } 1056 st->ss = 0; 1057 st->state = -1; 1058 return os; 1059 }