wtf.c (15114B)
1 2 #include "wc.h" 3 #include "wtf.h" 4 #include "sjis.h" 5 #include "big5.h" 6 #include "hkscs.h" 7 #include "johab.h" 8 #include "jis.h" 9 #include "viet.h" 10 #include "gbk.h" 11 #include "gb18030.h" 12 #include "uhc.h" 13 #ifdef USE_UNICODE 14 #include "ucs.h" 15 #include "utf8.h" 16 #endif 17 18 wc_uint8 WTF_WIDTH_MAP[ 0x100 ] = { 19 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 20 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 21 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 22 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 23 24 1,2,1,2,1,1,1,2, 1,2,1,2,1,1,1,1, 0,0,0,0,0,0,0,0, 0,0,0,0,1,1,1,1, 25 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 26 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 27 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 28 }; 29 30 wc_uint8 WTF_LEN_MAP[ 0x100 ] = { 31 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 32 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 33 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 34 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 35 36 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 37 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 38 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 39 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 40 }; 41 42 wc_uint8 WTF_TYPE_MAP[ 0x100 ] = { 43 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 44 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 45 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 46 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1, 47 48 2, 0xA,2, 0xA, 2, 0x12,2, 0xA, 2, 0xA,2, 0xA, 0x20,0x20,0x20,0x20, 49 4, 0xC,4, 0xC, 4, 0x20,4, 0xC, 4, 0xC,4, 0xC, 0x20,0x20,0x20,0x20, 50 0x20,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 51 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 52 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 53 }; 54 55 static wc_uint16 CCS_MAP[ 33 ] = { 56 WC_CCS_A_CS94 >> 8, WC_CCS_A_CS94W >> 8, 57 WC_CCS_A_CS96 >> 8, WC_CCS_A_CS96W >> 8, 58 WC_CCS_A_CS942 >> 8, WC_CCS_A_UNKNOWN >> 8, 59 WC_CCS_A_PCS >> 8, WC_CCS_A_PCSW >> 8, 60 WC_CCS_A_WCS16 >> 8, WC_CCS_A_WCS16W >> 8, 61 WC_CCS_A_WCS32 >> 8, WC_CCS_A_WCS32W >> 8, 62 0, 0, 63 0, 0, 64 WC_CCS_A_CS94_C >> 8, WC_CCS_A_CS94W_C >> 8, 65 WC_CCS_A_CS96_C >> 8, WC_CCS_A_CS96W_C >> 8, 66 WC_CCS_A_CS942_C >> 8, 0, 67 WC_CCS_A_PCS_C >> 8, WC_CCS_A_PCSW_C >> 8, 68 WC_CCS_A_WCS16_C >> 8, WC_CCS_A_WCS16W_C >> 8, 69 WC_CCS_A_WCS32_C >> 8, WC_CCS_A_WCS32W_C >> 8, 70 0, 0, 71 0, 0, 72 0, 73 }; 74 75 wc_ccs wtf_gr_ccs = 0; 76 static wc_ces wtf_major_ces = WC_CES_US_ASCII; 77 static wc_status wtf_major_st; 78 79 void 80 wtf_init(wc_ces ces1, wc_ces ces2) 81 { 82 int i; 83 wc_gset *gset; 84 85 if (wc_check_ces(ces2)) 86 wtf_major_ces = ces2; 87 88 if (! wc_check_ces(ces1)) 89 return; 90 gset = WcCesInfo[WC_CES_INDEX(ces1)].gset; 91 if (gset == NULL || gset[1].ccs == 0 || 92 gset[1].ccs & (WC_CCS_A_WCS16|WC_CCS_A_WCS32)) 93 return; 94 wtf_gr_ccs = gset[1].ccs; 95 96 if (WC_CCS_IS_WIDE(wtf_gr_ccs)) { 97 for (i = 0xa1; i <= 0xff; i++) { 98 WTF_WIDTH_MAP[i] = 2; 99 WTF_LEN_MAP[i] = 2; 100 WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1W; 101 } 102 } else { 103 for (i = 0xa1; i <= 0xff; i++) { 104 WTF_WIDTH_MAP[i] = 1; 105 WTF_LEN_MAP[i] = 1; 106 WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1; 107 } 108 } 109 } 110 111 /* 112 int 113 wtf_width(wc_uchar *p) 114 { 115 return (int)WTF_WIDTH_MAP[*p]; 116 } 117 */ 118 119 int 120 wtf_strwidth(wc_uchar *p) 121 { 122 int w = 0; 123 124 while (*p) { 125 w += wtf_width(p); 126 p += WTF_LEN_MAP[*p]; 127 } 128 return w; 129 } 130 131 /* 132 size_t 133 wtf_len1(wc_uchar *p) 134 { 135 return (size_t)WTF_LEN_MAP[*p]; 136 } 137 */ 138 139 size_t 140 wtf_len(wc_uchar *p) 141 { 142 wc_uchar *q = p; 143 144 q += WTF_LEN_MAP[*q]; 145 while (*q && ! WTF_WIDTH_MAP[*q]) 146 q += WTF_LEN_MAP[*q]; 147 return q - p; 148 } 149 150 /* 151 int 152 wtf_type(wc_uchar *p) 153 { 154 return (int)WTF_TYPE_MAP[*p]; 155 } 156 */ 157 158 #define wcs16_to_wtf(c, p) \ 159 ((p)[0] = (((c) >> 14) & 0x03) | 0x80), \ 160 ((p)[1] = (((c) >> 7) & 0x7f) | 0x80), \ 161 ((p)[2] = ( (c) & 0x7f) | 0x80) 162 #define wcs32_to_wtf(c, p) \ 163 ((p)[0] = (((c) >> 28) & 0x0f) | 0x80), \ 164 ((p)[1] = (((c) >> 21) & 0x7f) | 0x80), \ 165 ((p)[2] = (((c) >> 14) & 0x7f) | 0x80), \ 166 ((p)[3] = (((c) >> 7) & 0x7f) | 0x80), \ 167 ((p)[4] = ( (c) & 0x7f) | 0x80) 168 #define wtf_to_wcs16(p) \ 169 ((wc_uint32)((p)[0] & 0x03) << 14) \ 170 | ((wc_uint32)((p)[1] & 0x7f) << 7) \ 171 | ((wc_uint32)((p)[2] & 0x7f) ) 172 #define wtf_to_wcs32(p) \ 173 ((wc_uint32)((p)[0] & 0x0f) << 28) \ 174 | ((wc_uint32)((p)[1] & 0x7f) << 21) \ 175 | ((wc_uint32)((p)[2] & 0x7f) << 14) \ 176 | ((wc_uint32)((p)[3] & 0x7f) << 7) \ 177 | ((wc_uint32)((p)[4] & 0x7f) ) 178 179 void 180 wtf_push(Str os, wc_ccs ccs, wc_uint32 code) 181 { 182 wc_uchar s[8]; 183 wc_wchar_t cc, cc2; 184 size_t n; 185 186 if (ccs == WC_CCS_US_ASCII) { 187 Strcat_char(os, (char)(code & 0x7f)); 188 return; 189 } 190 cc.ccs = ccs; 191 cc.code = code; 192 if (WcOption.pre_conv && !(cc.ccs & WC_CCS_A_UNKNOWN)) { 193 if ((ccs == WC_CCS_JOHAB || ccs == WC_CCS_JOHAB_1 || 194 ccs == WC_CCS_JOHAB_2 || ccs == WC_CCS_JOHAB_3) && 195 (wtf_major_ces == WC_CES_EUC_KR || 196 wtf_major_ces == WC_CES_ISO_2022_KR)) { 197 cc2 = wc_johab_to_ksx1001(cc); 198 if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) 199 cc = cc2; 200 } else if (ccs == WC_CCS_KS_X_1001 && 201 wtf_major_ces == WC_CES_JOHAB) { 202 cc2 = wc_ksx1001_to_johab(cc); 203 if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) 204 cc = cc2; 205 } 206 #ifdef USE_UNICODE 207 else if (WcOption.ucs_conv) { 208 wc_bool fix_width_conv = WcOption.fix_width_conv; 209 WcOption.fix_width_conv = WC_FALSE; 210 wc_output_init(wtf_major_ces, &wtf_major_st); 211 if (! wc_ces_has_ccs(WC_CCS_SET(ccs), &wtf_major_st)) { 212 cc2 = wc_any_to_any_ces(cc, &wtf_major_st); 213 if (cc2.ccs == WC_CCS_US_ASCII) { 214 Strcat_char(os, (char)(cc2.code & 0x7f)); 215 return; 216 } 217 if (!WC_CCS_IS_UNKNOWN(cc2.ccs) && 218 cc2.ccs != WC_CCS_CP1258_2 && 219 cc2.ccs != WC_CCS_TCVN_5712_3) 220 cc = cc2; 221 } 222 WcOption.fix_width_conv = fix_width_conv; 223 } 224 #endif 225 } 226 227 switch (WC_CCS_TYPE(cc.ccs)) { 228 case WC_CCS_A_CS94: 229 if (cc.ccs == wtf_gr_ccs) { 230 s[0] = (cc.code & 0x7f) | 0x80; 231 n = 1; 232 break; 233 } 234 if (cc.ccs == WC_CCS_JIS_X_0201K && !WcOption.use_jisx0201k) { 235 cc2 = wc_jisx0201k_to_jisx0208(cc); 236 if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) { 237 wtf_push(os, cc2.ccs, cc2.code); 238 return; 239 } 240 } 241 s[0] = WTF_C_CS94; 242 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 243 s[2] = (cc.code & 0x7f) | 0x80; 244 n = 3; 245 break; 246 case WC_CCS_A_CS94W: 247 if (cc.ccs == wtf_gr_ccs) { 248 s[0] = ((cc.code >> 8) & 0x7f) | 0x80; 249 s[1] = ( cc.code & 0x7f) | 0x80; 250 n = 2; 251 break; 252 } 253 s[0] = WTF_C_CS94W; 254 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 255 s[2] = ((cc.code >> 8) & 0x7f) | 0x80; 256 s[3] = ( cc.code & 0x7f) | 0x80; 257 n = 4; 258 break; 259 case WC_CCS_A_CS96: 260 if (WcOption.use_combining && wc_is_combining(cc)) 261 s[0] = WTF_C_CS96_C; 262 else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) { 263 s[0] = (cc.code & 0x7f) | 0x80; 264 n = 1; 265 break; 266 } else 267 s[0] = WTF_C_CS96; 268 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 269 s[2] = (cc.code & 0x7f) | 0x80; 270 n = 3; 271 break; 272 case WC_CCS_A_CS96W: 273 if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) { 274 s[0] = ((cc.code >> 8) & 0x7f) | 0x80; 275 s[1] = ( cc.code & 0x7f) | 0x80; 276 n = 2; 277 break; 278 } 279 s[0] = WTF_C_CS96W; 280 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 281 s[2] = ((cc.code >> 8) & 0x7f) | 0x80; 282 s[3] = ( cc.code & 0x7f) | 0x80; 283 n = 4; 284 break; 285 case WC_CCS_A_CS942: 286 if (cc.ccs == wtf_gr_ccs) { 287 s[0] = (cc.code & 0x7f) | 0x80; 288 n = 1; 289 break; 290 } 291 s[0] = WTF_C_CS942; 292 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 293 s[2] = (cc.code & 0x7f) | 0x80; 294 n = 3; 295 break; 296 case WC_CCS_A_PCS: 297 if (WcOption.use_combining && wc_is_combining(cc)) 298 s[0] = WTF_C_PCS_C; 299 else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) { 300 s[0] = (cc.code & 0x7f) | 0x80; 301 n = 1; 302 break; 303 } else 304 s[0] = WTF_C_PCS; 305 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 306 s[2] = (cc.code & 0x7f) | 0x80; 307 n = 3; 308 break; 309 case WC_CCS_A_PCSW: 310 switch (cc.ccs) { 311 case WC_CCS_SJIS_EXT: 312 cc = wc_sjis_ext_to_cs94w(cc); 313 break; 314 case WC_CCS_GBK: 315 cc = wc_gbk_to_cs128w(cc); 316 break; 317 case WC_CCS_GBK_EXT: 318 cc = wc_gbk_ext_to_cs128w(cc); 319 break; 320 case WC_CCS_BIG5: 321 cc = wc_big5_to_cs94w(cc); 322 break; 323 case WC_CCS_HKSCS: 324 cc = wc_hkscs_to_cs128w(cc); 325 break; 326 case WC_CCS_JOHAB: 327 cc = wc_johab_to_cs128w(cc); 328 break; 329 case WC_CCS_UHC: 330 cc = wc_uhc_to_cs128w(cc); 331 break; 332 } 333 if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) { 334 s[0] = ((cc.code >> 8) & 0x7f) | 0x80; 335 s[1] = ( cc.code & 0x7f) | 0x80; 336 n = 2; 337 break; 338 } 339 s[0] = WTF_C_PCSW; 340 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 341 s[2] = ((cc.code >> 8) & 0x7f) | 0x80; 342 s[3] = ( cc.code & 0x7f) | 0x80; 343 n = 4; 344 break; 345 case WC_CCS_A_WCS16: 346 s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS16W : WTF_C_WCS16) 347 | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0); 348 wcs16_to_wtf(cc.code, s + 1); 349 s[1] |= (WC_CCS_INDEX(cc.ccs) << 2); 350 n = 4; 351 break; 352 case WC_CCS_A_WCS32: 353 s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS32W : WTF_C_WCS32) 354 | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0); 355 wcs32_to_wtf(cc.code, s + 1); 356 s[1] |= (WC_CCS_INDEX(cc.ccs) << 4); 357 n = 6; 358 break; 359 default: 360 s[0] = WTF_C_UNKNOWN; 361 s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; 362 s[2] = (cc.code & 0x7f) | 0x80; 363 n = 3; 364 break; 365 } 366 Strcat_charp_n(os, (char *)s, n); 367 } 368 369 void 370 wtf_push_unknown(Str os, wc_uchar *p, size_t len) 371 { 372 for (; len--; p++) { 373 if (*p & 0x80) 374 wtf_push(os, WC_CCS_UNKNOWN, *p); 375 else 376 Strcat_char(os, (char)*p); 377 } 378 } 379 380 wc_wchar_t 381 wtf_parse1(wc_uchar **p) 382 { 383 wc_uchar *q = *p; 384 wc_wchar_t cc; 385 386 if (*q < 0x80) { 387 cc.ccs = WC_CCS_US_ASCII; 388 cc.code = *(q++); 389 } else if (*q > 0xa0) { 390 cc.ccs = wtf_gr_ccs; 391 if (WC_CCS_IS_WIDE(cc.ccs)) { 392 cc.code = ((wc_uint32)*q << 8) | *(q+1); 393 q += 2; 394 } else 395 cc.code = *(q++); 396 } else { 397 cc.ccs = (wc_uint32)CCS_MAP[*(q++) - 0x80] << 8; 398 switch (WC_CCS_TYPE(cc.ccs)) { 399 case WC_CCS_A_CS94: 400 case WC_CCS_A_CS96: 401 case WC_CCS_A_CS942: 402 case WC_CCS_A_PCS: 403 case WC_CCS_A_UNKNOWN: 404 cc.ccs |= *(q++) & 0x7f; 405 cc.code = *(q++); 406 break; 407 case WC_CCS_A_CS94W: 408 case WC_CCS_A_CS96W: 409 case WC_CCS_A_PCSW: 410 cc.ccs |= *(q++) & 0x7f; 411 cc.code = ((wc_uint32)*q << 8) | *(q+1); 412 q += 2; 413 break; 414 case WC_CCS_A_WCS16: 415 case WC_CCS_A_WCS16W: 416 cc.ccs |= (*q & 0x7c) >> 2; 417 cc.code = wtf_to_wcs16(q); 418 q += 3; 419 break; 420 case WC_CCS_A_WCS32: 421 case WC_CCS_A_WCS32W: 422 cc.ccs |= (*q & 0x70) >> 4; 423 cc.code = wtf_to_wcs32(q); 424 q += 5; 425 break; 426 default: 427 /* case 0: */ 428 cc.ccs = WC_CCS_US_ASCII; 429 cc.code = (wc_uint32)' '; 430 break; 431 } 432 } 433 434 *p = q; 435 switch (cc.ccs) { 436 case WC_CCS_SJIS_EXT_1: 437 case WC_CCS_SJIS_EXT_2: 438 return wc_cs94w_to_sjis_ext(cc); 439 case WC_CCS_GBK_1: 440 case WC_CCS_GBK_2: 441 return wc_cs128w_to_gbk(cc); 442 case WC_CCS_GBK_EXT_1: 443 case WC_CCS_GBK_EXT_2: 444 return wc_cs128w_to_gbk_ext(cc); 445 case WC_CCS_BIG5_1: 446 case WC_CCS_BIG5_2: 447 return wc_cs94w_to_big5(cc); 448 case WC_CCS_HKSCS_1: 449 case WC_CCS_HKSCS_2: 450 return wc_cs128w_to_hkscs(cc); 451 case WC_CCS_JOHAB_1: 452 case WC_CCS_JOHAB_2: 453 case WC_CCS_JOHAB_3: 454 return wc_cs128w_to_johab(cc); 455 case WC_CCS_UHC_1: 456 case WC_CCS_UHC_2: 457 return wc_cs128w_to_uhc(cc); 458 } 459 return cc; 460 } 461 462 wc_wchar_t 463 wtf_parse(wc_uchar **p) 464 { 465 wc_uchar *q; 466 wc_wchar_t cc, cc2; 467 wc_uint32 ucs, ucs2; 468 469 if (**p < 0x80) { 470 cc.ccs = WC_CCS_US_ASCII; 471 cc.code = *((*p)++); 472 } else 473 cc = wtf_parse1(p); 474 if ((! WcOption.use_combining) || WTF_WIDTH_MAP[**p]) 475 return cc; 476 477 q = *p; 478 cc2 = wtf_parse1(&q); 479 if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_CP1258_1) && 480 WC_CCS_SET(cc2.ccs) == WC_CCS_CP1258_1) { 481 cc2.code = wc_cp1258_precompose(cc.code, cc2.code); 482 if (cc2.code) { 483 cc2.ccs = WC_CCS_CP1258_2; 484 *p = q; 485 return cc2; 486 } 487 } else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_TCVN_5712_1) && 488 WC_CCS_SET(cc2.ccs) == WC_CCS_TCVN_5712_1) { 489 cc2.code = wc_tcvn5712_precompose(cc.code, cc2.code); 490 if (cc2.code) { 491 cc2.ccs = WC_CCS_TCVN_5712_3; 492 *p = q; 493 return cc2; 494 } 495 } 496 #ifdef USE_UNICODE 497 else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_ISO_8859_1 || 498 WC_CCS_IS_UNICODE(cc.ccs)) && WC_CCS_IS_UNICODE(cc2.ccs)) { 499 while (1) { 500 ucs = (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG) 501 ? wc_ucs_tag_to_ucs(cc.code) : cc.code; 502 ucs2 = (WC_CCS_SET(cc2.ccs) == WC_CCS_UCS_TAG) 503 ? wc_ucs_tag_to_ucs(cc2.code) : cc2.code; 504 ucs = wc_ucs_precompose(ucs, ucs2); 505 if (ucs == WC_C_UCS4_ERROR) 506 break; 507 if (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG) 508 cc.code = wc_ucs_to_ucs_tag(ucs, wc_ucs_tag_to_tag(cc.code)); 509 else { 510 cc.ccs = wc_ucs_to_ccs(ucs); 511 cc.code = ucs; 512 } 513 *p = q; 514 if (! WTF_WIDTH_MAP[*q]) 515 break; 516 cc2 = wtf_parse1(&q); 517 if (! WC_CCS_IS_UNICODE(cc2.ccs)) 518 break; 519 } 520 } 521 #endif 522 return cc; 523 } 524 525 wc_ccs 526 wtf_get_ccs(wc_uchar *p) 527 { 528 return wtf_parse1(&p).ccs; 529 } 530 531 wc_uint32 532 wtf_get_code(wc_uchar *p) 533 { 534 return wtf_parse1(&p).code; 535 } 536 537 wc_bool 538 wtf_is_hangul(wc_uchar *p) 539 { 540 if (*p > 0xa0) 541 return (wtf_gr_ccs == WC_CCS_KS_X_1001 || wtf_gr_ccs == WC_CCS_JOHAB_1); 542 else if (*p == WTF_C_CS94W) 543 return ((*(p + 1) & 0x7f) == WC_F_KS_X_1001); 544 else if (*p == WTF_C_PCSW) { 545 wc_uchar f = *(p + 1) & 0x7f; 546 return (f == WC_F_JOHAB_1 || f == WC_F_JOHAB_2 || f == WC_F_JOHAB_3 || 547 f == WC_F_UHC_1 || f == WC_F_UHC_2); 548 } 549 #ifdef USE_UNICODE 550 else if (*p == WTF_C_WCS16W) { 551 wc_uchar f = (*(++p) & 0x7f) >> 2; 552 if (f == WC_F_UCS2) 553 return wc_is_ucs_hangul(wtf_to_wcs16(p)); 554 } else if (*p == WTF_C_WCS32W) { 555 wc_uchar f = (*(++p) & 0x7f) >> 4; 556 if (f == WC_F_UCS_TAG) 557 return wc_is_ucs_hangul(wc_ucs_tag_to_ucs(wtf_to_wcs32(p))); 558 } 559 #endif 560 return WC_FALSE; 561 } 562 563 char * 564 wtf_conv_fit(char *s, wc_ces ces) 565 { 566 wc_uchar *p; 567 Str os; 568 wc_wchar_t cc; 569 wc_ces major_ces; 570 wc_bool pre_conv, ucs_conv; 571 572 if (ces == WC_CES_WTF || ces == WC_CES_US_ASCII) 573 return s; 574 575 for (p = (wc_uchar *)s; *p && *p < 0x80; p++) 576 ; 577 if (! *p) 578 return s; 579 580 os = Strnew_size(strlen(s)); 581 if (p > (wc_uchar *)s) 582 Strcopy_charp_n(os, s, (int)(p - (wc_uchar *)s)); 583 584 major_ces = wtf_major_ces; 585 pre_conv = WcOption.pre_conv; 586 ucs_conv = WcOption.ucs_conv; 587 wtf_major_ces = ces; 588 WcOption.pre_conv = WC_TRUE; 589 WcOption.ucs_conv = WC_TRUE; 590 while (*p) { 591 cc = wtf_parse1(&p); 592 wtf_push(os, cc.ccs, cc.code); 593 } 594 wtf_major_ces = major_ces; 595 WcOption.pre_conv = pre_conv; 596 WcOption.ucs_conv = ucs_conv; 597 return os->ptr; 598 }