johab.c (9496B)
1 2 #include "wc.h" 3 #include "johab.h" 4 #include "wtf.h" 5 #ifdef USE_UNICODE 6 #include "ucs.h" 7 #endif 8 9 #define C0 WC_JOHAB_MAP_C0 10 #define GL WC_JOHAB_MAP_GL 11 #define C1 WC_JOHAB_MAP_C1 12 #define GH WC_JOHAB_MAP_GH 13 #define GB WC_JOHAB_MAP_GB 14 #define JJ WC_JOHAB_MAP_JJ 15 #define JB WC_JOHAB_MAP_JB 16 #define HB WC_JOHAB_MAP_HB 17 #define CJ WC_JOHAB_MAP_CJ 18 #define CB WC_JOHAB_MAP_CB 19 20 /* 21 00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF 22 C0 GL GL GL C0 - - J J - H - H - - 23 - - J B - - J J B B B B B B - 24 25 C0 GL GH GB C0 C1 CJ JJ JB CB HB CB HB CB C1 26 */ 27 28 wc_uint8 WC_JOHAB_MAP[ 0x100 ] = { 29 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 30 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 31 /* 20 */ 32 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 33 /* 30 31 */ 34 GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, 35 /* 40 41 */ 36 GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 37 GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 38 GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, 39 GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0, 40 41 /* 80 83 84 */ 42 C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, 43 /* 90 91 */ 44 JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 45 JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 46 JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 47 JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, 48 /* D3 D4 D7 D8 DF */ 49 JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, 50 HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, 51 /* F9 FA FE FF */ 52 HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1, 53 }; 54 55 static wc_uint8 johab1_N_map[ 3 ][ 32 ] = { 56 { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 57 15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 58 { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11, 59 0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 }, 60 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 61 16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 } 62 }; 63 64 static wc_uint8 N_johab1_map[ 3 ][ 32 ] = { 65 { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17, 66 18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 67 { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22, 68 23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 69 { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, 70 17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 } 71 }; 72 73 wc_wchar_t 74 wc_johab_to_ksx1001(wc_wchar_t cc) 75 { 76 #ifdef USE_UNICODE 77 static wc_table *t = NULL; 78 #endif 79 80 switch (cc.ccs) { 81 case WC_CCS_JOHAB: 82 return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc)); 83 case WC_CCS_JOHAB_1: 84 case WC_CCS_JOHAB_2: 85 #ifdef USE_UNICODE 86 if (WcOption.ucs_conv) { 87 if (t == NULL) 88 t = wc_get_ucs_table(WC_CCS_KS_X_1001); 89 cc = wc_any_to_any(cc, t); 90 } else 91 #endif 92 cc.ccs = WC_CCS_UNKNOWN_W; 93 break; 94 case WC_CCS_JOHAB_3: 95 if (cc.code >= 0x2121) 96 cc.ccs = WC_CCS_KS_X_1001; 97 else 98 cc.ccs = WC_CCS_UNKNOWN_W; 99 break; 100 } 101 return cc; 102 } 103 104 wc_wchar_t 105 wc_ksx1001_to_johab(wc_wchar_t cc) 106 { 107 cc.code &= 0x7f7f; 108 if ((cc.code >= 0x2121 && cc.code < 0x2421) || 109 (cc.code > 0x2453 && cc.code <= 0x2C7E) || 110 (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) { 111 cc.ccs = WC_CCS_JOHAB_3; 112 return cc; 113 } 114 #ifdef USE_UNICODE 115 if (WcOption.ucs_conv) 116 cc = wc_ucs_to_johab(wc_any_to_ucs(cc)); 117 else 118 #endif 119 cc.ccs = WC_CCS_UNKNOWN_W; 120 return cc; 121 } 122 123 #ifdef USE_UNICODE 124 wc_wchar_t 125 wc_ucs_to_johab(wc_uint32 ucs) 126 { 127 wc_table *t; 128 wc_wchar_t cc; 129 130 if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) { 131 ucs -= WC_C_UCS2_HANGUL; 132 cc.code = WC_N_JOHAB1(ucs); 133 cc.ccs = WC_CCS_JOHAB; 134 } else if (ucs >= 0x3131 && ucs <= 0x3163) { 135 t = wc_get_ucs_table(WC_CCS_JOHAB_2); 136 cc = wc_ucs_to_any(ucs, t); 137 } else { 138 t = wc_get_ucs_table(WC_CCS_JOHAB_3); 139 cc = wc_ucs_to_any(ucs, t); 140 } 141 return cc; 142 } 143 #endif 144 145 wc_uint32 146 wc_johab1_to_N(wc_uint32 code) 147 { 148 wc_uint32 a, b, c; 149 150 a = johab1_N_map[0][(code >> 10) & 0x1F]; 151 b = johab1_N_map[1][(code >> 5) & 0x1F]; 152 c = johab1_N_map[2][ code & 0x1F]; 153 if (a && b && c) 154 return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1); 155 return WC_C_JOHAB_ERROR; 156 } 157 158 wc_uint32 159 wc_N_to_johab1(wc_uint32 code) 160 { 161 wc_uint32 a, b, c; 162 163 a = N_johab1_map[0][(code / 28) / 21]; 164 b = N_johab1_map[1][(code / 28) % 21]; 165 c = N_johab1_map[2][ code % 28 ]; 166 return 0x8000 | (a << 10) | (b << 5) | c; 167 } 168 169 /* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E 170 (0x1F21 - 0x207E are not in KS X 1001) */ 171 #define johab3_to_ksx1001(ub, lb) \ 172 { \ 173 if (ub < 0xe0) { \ 174 ub = ((ub - 0xd8) << 1) + 0x1f; \ 175 } else { \ 176 ub = ((ub - 0xe0) << 1) + 0x4a; \ 177 } \ 178 if (lb < 0xa1) { \ 179 lb -= (lb < 0x91) ? 0x10 : 0x22; \ 180 } else { \ 181 ub++; \ 182 lb -= 0x80; \ 183 } \ 184 } 185 186 #define ksx1001_to_johab3(ub, lb) \ 187 { \ 188 if (ub < 0x4a) { \ 189 ub -= 0x1f; \ 190 lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ 191 ub = (ub >> 1) + 0xd8; \ 192 } else { \ 193 ub -= 0x4a; \ 194 lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ 195 ub = (ub >> 1) + 0xe0; \ 196 } \ 197 } 198 199 wc_wchar_t 200 wc_johab_to_cs128w(wc_wchar_t cc) 201 { 202 wc_uint32 n; 203 wc_uchar ub, lb; 204 205 if (cc.code < 0xD800) { 206 n = WC_JOHAB1_N(cc.code); 207 if (n != WC_C_JOHAB_ERROR) { 208 cc.code = WC_N_CS94x128(n); 209 cc.ccs = WC_CCS_JOHAB_1; 210 } else { 211 n = WC_JOHAB2_N(cc.code); 212 cc.code = WC_N_CS128W(n); 213 cc.ccs = WC_CCS_JOHAB_2; 214 } 215 } else { 216 ub = cc.code >> 8; 217 lb = cc.code & 0xff; 218 johab3_to_ksx1001(ub, lb); 219 cc.code = ((wc_uint32)ub << 8) | lb; 220 cc.ccs = WC_CCS_JOHAB_3; 221 } 222 return cc; 223 } 224 225 wc_wchar_t 226 wc_cs128w_to_johab(wc_wchar_t cc) 227 { 228 wc_uint32 n; 229 wc_uchar ub, lb; 230 231 switch (cc.ccs) { 232 case WC_CCS_JOHAB_1: 233 n = WC_CS94x128_N(cc.code); 234 cc.code = WC_N_JOHAB1(n); 235 break; 236 case WC_CCS_JOHAB_2: 237 n = WC_CS128W_N(cc.code); 238 cc.code = WC_N_JOHAB2(n); 239 break; 240 case WC_CCS_JOHAB_3: 241 ub = (cc.code >> 8) & 0x7f; 242 lb = cc.code & 0x7f; 243 ksx1001_to_johab3(ub, lb); 244 cc.code = ((wc_uint32)ub << 8) | lb; 245 } 246 cc.ccs = WC_CCS_JOHAB; 247 return cc; 248 } 249 250 Str 251 wc_conv_from_johab(Str is, wc_ces ces) 252 { 253 Str os; 254 wc_uchar *sp = (wc_uchar *)is->ptr; 255 wc_uchar *ep = sp + is->length; 256 wc_uchar *p; 257 int state = WC_JOHAB_NOSTATE; 258 259 for (p = sp; p < ep && *p < 0x80; p++) 260 ; 261 if (p == ep) 262 return is; 263 os = Strnew_size(is->length); 264 if (p > sp) 265 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 266 267 for (; p < ep; p++) { 268 switch (state) { 269 case WC_JOHAB_NOSTATE: 270 switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) { 271 case WC_JOHAB_MAP_UJ: 272 state = WC_JOHAB_HANGUL1; 273 break; 274 case WC_JOHAB_MAP_UH: 275 state = WC_JOHAB_HANJA1; 276 break; 277 case WC_JOHAB_MAP_C1: 278 wtf_push_unknown(os, p, 1); 279 break; 280 default: 281 Strcat_char(os, (char)*p); 282 break; 283 } 284 break; 285 case WC_JOHAB_HANGUL1: 286 if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) 287 wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); 288 else 289 wtf_push_unknown(os, p-1, 2); 290 state = WC_JOHAB_NOSTATE; 291 break; 292 case WC_JOHAB_HANJA1: 293 if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH) 294 wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); 295 else 296 wtf_push_unknown(os, p-1, 2); 297 state = WC_JOHAB_NOSTATE; 298 break; 299 } 300 } 301 switch (state) { 302 case WC_JOHAB_HANGUL1: 303 case WC_JOHAB_HANJA1: 304 wtf_push_unknown(os, p-1, 1); 305 break; 306 } 307 return os; 308 } 309 310 void 311 wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st) 312 { 313 while (1) { 314 switch (cc.ccs) { 315 case WC_CCS_US_ASCII: 316 Strcat_char(os, (char)cc.code); 317 return; 318 case WC_CCS_JOHAB_1: 319 case WC_CCS_JOHAB_2: 320 case WC_CCS_JOHAB_3: 321 cc = wc_cs128w_to_johab(cc); 322 case WC_CCS_JOHAB: 323 Strcat_char(os, (char)(cc.code >> 8)); 324 Strcat_char(os, (char)(cc.code & 0xff)); 325 return; 326 case WC_CCS_KS_X_1001: 327 cc = wc_ksx1001_to_johab(cc); 328 continue; 329 case WC_CCS_UNKNOWN_W: 330 if (!WcOption.no_replace) 331 Strcat_charp(os, WC_REPLACE_W); 332 return; 333 case WC_CCS_UNKNOWN: 334 if (!WcOption.no_replace) 335 Strcat_charp(os, WC_REPLACE); 336 return; 337 default: 338 #ifdef USE_UNICODE 339 if (WcOption.ucs_conv) 340 cc = wc_any_to_any_ces(cc, st); 341 else 342 #endif 343 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 344 continue; 345 } 346 } 347 } 348 349 Str 350 wc_char_conv_from_johab(wc_uchar c, wc_status *st) 351 { 352 static Str os; 353 static wc_uchar johabu; 354 355 if (st->state == -1) { 356 st->state = WC_JOHAB_NOSTATE; 357 os = Strnew_size(8); 358 } 359 360 switch (st->state) { 361 case WC_JOHAB_NOSTATE: 362 switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) { 363 case WC_JOHAB_MAP_UJ: 364 johabu = c; 365 st->state = WC_JOHAB_HANGUL1; 366 return NULL; 367 case WC_JOHAB_MAP_UH: 368 johabu = c; 369 st->state = WC_JOHAB_HANJA1; 370 return NULL; 371 case WC_JOHAB_MAP_C1: 372 break; 373 default: 374 Strcat_char(os, (char)c); 375 break; 376 } 377 break; 378 case WC_JOHAB_HANGUL1: 379 if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ) 380 wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); 381 break; 382 case WC_JOHAB_HANJA1: 383 if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH) 384 wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); 385 break; 386 } 387 st->state = -1; 388 return os; 389 }