sjis.c (12999B)
1 2 #include "wc.h" 3 #include "sjis.h" 4 #include "jis.h" 5 #include "wtf.h" 6 #include "ucs.h" 7 8 #include "map/jisx02132_sjis.map" 9 wc_uchar *wc_jisx0212_jisx02132_map = jisx02132_sjis_map; 10 11 #define C0 WC_SJIS_MAP_C0 12 #define GL WC_SJIS_MAP_GL 13 #define LB WC_SJIS_MAP_LB 14 #define S80 WC_SJIS_MAP_80 15 #define SK WC_SJIS_MAP_SK 16 #define SL WC_SJIS_MAP_SL 17 #define SH WC_SJIS_MAP_SH 18 #define SX WC_SJIS_MAP_SX 19 #define C1 WC_SJIS_MAP_C1 20 #define SA0 WC_SJIS_MAP_A0 21 22 wc_uint8 WC_SJIS_MAP[ 0x100 ] = { 23 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 24 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 25 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 26 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 27 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 28 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 29 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 30 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, 31 32 S80,SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, 33 SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, 34 SA0,SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, 35 SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, 36 SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, 37 SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, 38 SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, 39 SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, C1, C1, C1, 40 }; 41 42 #define sjis_to_jisx0208(ub, lb) \ 43 { \ 44 ub -= (ub < 0xa0) ? 0x81 : 0xc1; \ 45 ub = (ub << 1) + 0x21; \ 46 if (lb < 0x9f) { \ 47 lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ 48 } else { \ 49 ub++; \ 50 lb -= 0x7e; \ 51 } \ 52 } 53 #define sjis_to_jisx02132(ub, lb) \ 54 { \ 55 if (lb < 0x9f) { \ 56 ub = sjis1_jisx02132_map[ub - 0xf0]; \ 57 lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ 58 } else { \ 59 ub = sjis2_jisx02132_map[ub - 0xf0]; \ 60 lb -= 0x7e; \ 61 } \ 62 } 63 #define jisx0208_to_sjis(ub, lb) \ 64 { \ 65 lb += (ub & 1) ? 0x1f : 0x7d; \ 66 if (lb > 0x7e) \ 67 lb++; \ 68 ub = (ub - 0x21) >> 1; \ 69 ub += (ub < 0x1f) ? 0x81 : 0xc1; \ 70 } 71 #define jisx02132_to_sjis(ub, lb) \ 72 { \ 73 lb += (ub & 1) ? 0x1f : 0x7d; \ 74 if (lb > 0x7e) \ 75 lb++; \ 76 ub = jisx02132_sjis_map[ ub ]; \ 77 } 78 79 wc_wchar_t 80 wc_sjis_to_jis(wc_wchar_t cc) 81 { 82 wc_uchar ub, lb; 83 84 ub = cc.code >> 8; 85 lb = cc.code & 0xff; 86 if (ub < 0xf0) { 87 sjis_to_jisx0208(ub, lb); 88 cc.ccs = WC_CCS_JIS_X_0208; 89 } else { 90 sjis_to_jisx02132(ub, lb); 91 cc.ccs = WC_CCS_JIS_X_0213_2; 92 } 93 cc.code = ((wc_uint32)ub << 8) | lb; 94 return cc; 95 } 96 97 wc_wchar_t 98 wc_jis_to_sjis(wc_wchar_t cc) 99 { 100 wc_uchar ub, lb; 101 102 ub = (cc.code >> 8) & 0x7f; 103 lb = cc.code & 0x7f; 104 if (cc.ccs == WC_CCS_JIS_X_0213_2) { 105 jisx02132_to_sjis(ub, lb); 106 if (! ub) { 107 cc.ccs = WC_CCS_UNKNOWN_W; 108 return cc; 109 } 110 } else { 111 jisx0208_to_sjis(ub, lb); 112 } 113 cc.code = ((wc_uint32)ub << 8) | lb; 114 return cc; 115 } 116 117 wc_wchar_t 118 wc_sjis_ext_to_cs94w(wc_wchar_t cc) 119 { 120 wc_uchar ub, lb; 121 122 ub = cc.code >> 8; 123 lb = cc.code & 0xff; 124 sjis_to_jisx0208(ub, lb); 125 if (ub <= 0x7e) { 126 cc.ccs = WC_CCS_SJIS_EXT_1; 127 } else { 128 ub -= 0x5e; 129 cc.ccs = WC_CCS_SJIS_EXT_2; 130 } 131 cc.code = ((wc_uint32)ub << 8) | lb; 132 return cc; 133 } 134 135 wc_wchar_t 136 wc_cs94w_to_sjis_ext(wc_wchar_t cc) 137 { 138 wc_uchar ub, lb; 139 140 ub = (cc.code >> 8) & 0x7f; 141 lb = cc.code & 0x7f; 142 if (cc.ccs == WC_CCS_SJIS_EXT_2) 143 ub += 0x5e; 144 jisx0208_to_sjis(ub, lb); 145 cc.ccs = WC_CCS_SJIS_EXT; 146 cc.code = ((wc_uint32)ub << 8) | lb; 147 return cc; 148 } 149 150 wc_uint32 151 wc_sjis_ext1_to_N(wc_uint32 c) 152 { 153 wc_uchar ub; 154 155 ub = (c >> 8) & 0x7f; 156 switch(ub) { 157 case 0x2D: /* 0x8740 - */ 158 ub = 0; 159 break; 160 case 0x79: /* 0xED40 - */ 161 case 0x7A: /* 0xED9F - */ 162 case 0x7B: /* 0xEE40 - */ 163 case 0x7C: /* 0xEE9F - */ 164 ub -= 0x78; 165 break; 166 default: 167 return WC_C_SJIS_ERROR; 168 } 169 return ub * 0x5e + (c & 0x7f) - 0x21; 170 } 171 172 wc_uint32 173 wc_sjis_ext2_to_N(wc_uint32 c) 174 { 175 wc_uchar ub; 176 177 ub = (c >> 8) & 0x7f; 178 switch(ub) { 179 case 0x35: /* 0xFA40 - */ 180 case 0x36: /* 0xFA9F - */ 181 case 0x37: /* 0xFB40 - */ 182 case 0x38: /* 0xFB9F - */ 183 case 0x39: /* 0xFC40 - */ 184 ub -= 0x30; 185 break; 186 default: 187 return WC_C_SJIS_ERROR; 188 } 189 return ub * 0x5e + (c & 0x7f) - 0x21; 190 } 191 192 Str 193 wc_conv_from_sjis(Str is, wc_ces ces) 194 { 195 Str os; 196 wc_uchar *sp = (wc_uchar *)is->ptr; 197 wc_uchar *ep = sp + is->length; 198 wc_uchar *p; 199 wc_uchar jis[2]; 200 int state = WC_SJIS_NOSTATE; 201 wc_wchar_t cc; 202 203 for (p = sp; p < ep && *p < 0x80; p++) 204 ; 205 if (p == ep) 206 return is; 207 os = Strnew_size(is->length); 208 if (p > sp) 209 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 210 211 for (; p < ep; p++) { 212 switch (state) { 213 case WC_SJIS_NOSTATE: 214 switch (WC_SJIS_MAP[*p]) { 215 case SL: 216 state = WC_SJIS_SHIFT_L; 217 break; 218 case SH: 219 state = WC_SJIS_SHIFT_H; 220 break; 221 case SX: 222 state = WC_SJIS_SHIFT_X; 223 break; 224 case SK: 225 wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); 226 break; 227 case S80: 228 case SA0: 229 case C1: 230 wtf_push_unknown(os, p, 1); 231 break; 232 default: 233 Strcat_char(os, (char)*p); 234 break; 235 } 236 break; 237 case WC_SJIS_SHIFT_L: 238 case WC_SJIS_SHIFT_H: 239 if (WC_SJIS_MAP[*p] & LB) { 240 jis[0] = *(p-1); 241 jis[1] = *p; 242 sjis_to_jisx0208(jis[0], jis[1]); 243 cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; 244 cc.ccs = wc_jisx0208_or_jisx02131(cc.code); 245 if (cc.ccs == WC_CCS_JIS_X_0208) 246 wtf_push(os, cc.ccs, cc.code); 247 else 248 wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); 249 } else 250 wtf_push_unknown(os, p-1, 2); 251 state = WC_SJIS_NOSTATE; 252 break; 253 case WC_SJIS_SHIFT_X: 254 if (WC_SJIS_MAP[*p] & LB) 255 wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); 256 else 257 wtf_push_unknown(os, p-1, 2); 258 state = WC_SJIS_NOSTATE; 259 break; 260 } 261 } 262 switch (state) { 263 case WC_SJIS_SHIFT_L: 264 case WC_SJIS_SHIFT_H: 265 case WC_SJIS_SHIFT_X: 266 wtf_push_unknown(os, p-1, 1); 267 break; 268 } 269 return os; 270 } 271 272 Str 273 wc_conv_from_sjisx0213(Str is, wc_ces ces) 274 { 275 Str os; 276 wc_uchar *sp = (wc_uchar *)is->ptr; 277 wc_uchar *ep = sp + is->length; 278 wc_uchar *p; 279 wc_uchar jis[2]; 280 int state = WC_SJIS_NOSTATE; 281 wc_wchar_t cc; 282 283 for (p = sp; p < ep && *p < 0x80; p++) 284 ; 285 if (p == ep) 286 return is; 287 os = Strnew_size(is->length); 288 if (p > sp) 289 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 290 291 for (; p < ep; p++) { 292 switch (state) { 293 case WC_SJIS_NOSTATE: 294 switch (WC_SJIS_MAP[*p]) { 295 case SL: 296 state = WC_SJIS_SHIFT_L; 297 break; 298 case SH: 299 state = WC_SJIS_SHIFT_H; 300 break; 301 case SX: 302 state = WC_SJIS_SHIFT_X; 303 break; 304 case SK: 305 wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); 306 break; 307 case S80: 308 case SA0: 309 case C1: 310 wtf_push_unknown(os, p, 1); 311 break; 312 default: 313 Strcat_char(os, (char)*p); 314 break; 315 } 316 break; 317 case WC_SJIS_SHIFT_L: 318 case WC_SJIS_SHIFT_H: 319 if (WC_SJIS_MAP[*p] & LB) { 320 jis[0] = *(p-1); 321 jis[1] = *p; 322 sjis_to_jisx0208(jis[0], jis[1]); 323 cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; 324 cc.ccs = wc_jisx0208_or_jisx02131(cc.code); 325 wtf_push(os, cc.ccs, cc.code); 326 } else 327 wtf_push_unknown(os, p-1, 2); 328 state = WC_SJIS_NOSTATE; 329 break; 330 case WC_SJIS_SHIFT_X: 331 if (WC_SJIS_MAP[*p] & LB) { 332 jis[0] = *(p-1); 333 jis[1] = *p; 334 sjis_to_jisx02132(jis[0], jis[1]); 335 wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); 336 } else 337 wtf_push_unknown(os, p-1, 2); 338 state = WC_SJIS_NOSTATE; 339 break; 340 } 341 } 342 switch (state) { 343 case WC_SJIS_SHIFT_L: 344 case WC_SJIS_SHIFT_H: 345 case WC_SJIS_SHIFT_X: 346 wtf_push_unknown(os, p-1, 1); 347 break; 348 } 349 return os; 350 } 351 352 void 353 wc_push_to_sjis(Str os, wc_wchar_t cc, wc_status *st) 354 { 355 wc_uchar ub, lb; 356 357 while (1) { 358 switch (cc.ccs) { 359 case WC_CCS_US_ASCII: 360 Strcat_char(os, cc.code); 361 return; 362 case WC_CCS_JIS_X_0201K: 363 if (WcOption.use_jisx0201k) { 364 Strcat_char(os, cc.code | 0x80); 365 return; 366 } else if (WcOption.fix_width_conv) 367 cc.ccs = WC_CCS_UNKNOWN; 368 else 369 cc = wc_jisx0201k_to_jisx0208(cc); 370 continue; 371 case WC_CCS_JIS_X_0208: 372 ub = (cc.code >> 8) & 0x7f; 373 lb = cc.code & 0x7f; 374 jisx0208_to_sjis(ub, lb); 375 Strcat_char(os, ub); 376 Strcat_char(os, lb); 377 return; 378 case WC_CCS_SJIS_EXT_1: 379 case WC_CCS_SJIS_EXT_2: 380 cc = wc_cs94w_to_sjis_ext(cc); 381 case WC_CCS_SJIS_EXT: 382 Strcat_char(os, (char)(cc.code >> 8)); 383 Strcat_char(os, (char)(cc.code & 0xff)); 384 return; 385 case WC_CCS_UNKNOWN_W: 386 if (!WcOption.no_replace) 387 Strcat_charp(os, WC_REPLACE_W); 388 return; 389 case WC_CCS_UNKNOWN: 390 if (!WcOption.no_replace) 391 Strcat_charp(os, WC_REPLACE); 392 return; 393 default: 394 #ifdef USE_UNICODE 395 if (WcOption.ucs_conv) 396 cc = wc_any_to_any_ces(cc, st); 397 else 398 #endif 399 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 400 continue; 401 } 402 } 403 } 404 405 void 406 wc_push_to_sjisx0213(Str os, wc_wchar_t cc, wc_status *st) 407 { 408 wc_uchar ub, lb; 409 410 while (1) { 411 switch (cc.ccs) { 412 case WC_CCS_US_ASCII: 413 Strcat_char(os, cc.code); 414 return; 415 case WC_CCS_JIS_X_0201K: 416 if (WcOption.use_jisx0201k) { 417 Strcat_char(os, cc.code | 0x80); 418 return; 419 } else if (WcOption.fix_width_conv) 420 cc.ccs = WC_CCS_UNKNOWN; 421 else 422 cc = wc_jisx0201k_to_jisx0208(cc); 423 continue; 424 case WC_CCS_JIS_X_0213_1: 425 if (! WcOption.use_jisx0213) { 426 cc.ccs = WC_CCS_UNKNOWN_W; 427 continue; 428 } 429 case WC_CCS_JIS_X_0208: 430 ub = (cc.code >> 8) & 0x7f; 431 lb = cc.code & 0x7f; 432 jisx0208_to_sjis(ub, lb); 433 Strcat_char(os, ub); 434 Strcat_char(os, lb); 435 return; 436 case WC_CCS_JIS_X_0213_2: 437 if (! WcOption.use_jisx0213) { 438 cc.ccs = WC_CCS_UNKNOWN_W; 439 continue; 440 } 441 ub = (cc.code >> 8) & 0x7f; 442 lb = cc.code & 0x7f; 443 jisx02132_to_sjis(ub, lb); 444 if (ub) { 445 Strcat_char(os, ub); 446 Strcat_char(os, lb); 447 return; 448 } 449 case WC_CCS_UNKNOWN_W: 450 if (!WcOption.no_replace) 451 Strcat_charp(os, WC_REPLACE_W); 452 return; 453 case WC_CCS_UNKNOWN: 454 if (!WcOption.no_replace) 455 Strcat_charp(os, WC_REPLACE); 456 return; 457 default: 458 #ifdef USE_UNICODE 459 if (WcOption.ucs_conv) 460 cc = wc_any_to_any_ces(cc, st); 461 else 462 #endif 463 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 464 continue; 465 } 466 } 467 } 468 469 Str 470 wc_char_conv_from_sjis(wc_uchar c, wc_status *st) 471 { 472 static Str os; 473 static wc_uchar jis[2]; 474 wc_wchar_t cc; 475 476 if (st->state == -1) { 477 st->state = WC_SJIS_NOSTATE; 478 os = Strnew_size(8); 479 } 480 481 switch (st->state) { 482 case WC_SJIS_NOSTATE: 483 switch (WC_SJIS_MAP[c]) { 484 case SL: 485 jis[0] = c; 486 st->state = WC_SJIS_SHIFT_L; 487 return NULL; 488 case SH: 489 jis[0] = c; 490 st->state = WC_SJIS_SHIFT_H; 491 return NULL; 492 case SX: 493 jis[0] = c; 494 st->state = WC_SJIS_SHIFT_X; 495 return NULL; 496 case SK: 497 wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); 498 break; 499 case S80: 500 case SA0: 501 case C1: 502 break; 503 default: 504 Strcat_char(os, (char)c); 505 break; 506 } 507 break; 508 case WC_SJIS_SHIFT_L: 509 case WC_SJIS_SHIFT_H: 510 if (WC_SJIS_MAP[c] & LB) { 511 jis[1] = c; 512 sjis_to_jisx0208(jis[0], jis[1]); 513 cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; 514 cc.ccs = wc_jisx0208_or_jisx02131(cc.code); 515 if (cc.ccs == WC_CCS_JIS_X_0208) 516 wtf_push(os, cc.ccs, cc.code); 517 else 518 wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); 519 } 520 st->state = WC_SJIS_NOSTATE; 521 break; 522 case WC_SJIS_SHIFT_X: 523 if (WC_SJIS_MAP[c] & LB) { 524 jis[1] = c; 525 wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); 526 } 527 st->state = WC_SJIS_NOSTATE; 528 break; 529 } 530 st->state = -1; 531 return os; 532 } 533 534 Str 535 wc_char_conv_from_sjisx0213(wc_uchar c, wc_status *st) 536 { 537 static Str os; 538 static wc_uchar jis[2]; 539 wc_wchar_t cc; 540 541 if (st->state == -1) { 542 st->state = WC_SJIS_NOSTATE; 543 os = Strnew_size(8); 544 } 545 546 switch (st->state) { 547 case WC_SJIS_NOSTATE: 548 switch (WC_SJIS_MAP[c]) { 549 case SL: 550 jis[0] = c; 551 st->state = WC_SJIS_SHIFT_L; 552 return NULL; 553 case SH: 554 jis[0] = c; 555 st->state = WC_SJIS_SHIFT_H; 556 return NULL; 557 case SX: 558 jis[0] = c; 559 st->state = WC_SJIS_SHIFT_X; 560 return NULL; 561 case SK: 562 wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); 563 break; 564 case S80: 565 case SA0: 566 case C1: 567 break; 568 default: 569 Strcat_char(os, (char)c); 570 break; 571 } 572 break; 573 case WC_SJIS_SHIFT_L: 574 case WC_SJIS_SHIFT_H: 575 if (WC_SJIS_MAP[c] & LB) { 576 jis[1] = c; 577 sjis_to_jisx0208(jis[0], jis[1]); 578 cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; 579 cc.ccs = wc_jisx0208_or_jisx02131(cc.code); 580 wtf_push(os, cc.ccs, cc.code); 581 } 582 st->state = WC_SJIS_NOSTATE; 583 break; 584 case WC_SJIS_SHIFT_X: 585 if (WC_SJIS_MAP[c] & LB) { 586 jis[1] = c; 587 sjis_to_jisx02132(jis[0], jis[1]); 588 wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); 589 } 590 st->state = WC_SJIS_NOSTATE; 591 break; 592 } 593 st->state = -1; 594 return os; 595 }