w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

ucs.c (17840B)


      1 
      2 #ifdef USE_UNICODE
      3 
      4 #include <stdlib.h>
      5 #include "wc.h"
      6 #include "ucs.h"
      7 #include "search.h"
      8 #include "big5.h"
      9 #include "hkscs.h"
     10 #include "sjis.h"
     11 #include "johab.h"
     12 #include "gbk.h"
     13 #include "gb18030.h"
     14 #include "uhc.h"
     15 #include "viet.h"
     16 #include "wtf.h"
     17 
     18 #include "ucs.map"
     19 
     20 #include "map/ucs_ambwidth.map"
     21 #include "map/ucs_wide.map"
     22 #include "map/ucs_combining.map"
     23 #include "map/ucs_precompose.map"
     24 #include "map/ucs_hangul.map"
     25 #include "map/ucs_fullwidth.map"
     26 #include "map/ucs_isalpha.map"
     27 #include "map/ucs_isdigit.map"
     28 #include "map/ucs_islower.map"
     29 #include "map/ucs_isupper.map"
     30 #include "map/ucs_case.map"
     31 
     32 #define MAX_TAG_MAP 0x100
     33 static int n_tag_map = 0;
     34 static char *tag_map[ MAX_TAG_MAP ];
     35 
     36 wc_table *
     37 wc_get_ucs_table(wc_ccs ccs)
     38 {
     39     int f = WC_CCS_INDEX(ccs);
     40 
     41     switch (WC_CCS_TYPE(ccs)) {
     42     case WC_CCS_A_CS94:
     43 	if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
     44 	    return NULL;
     45 	return &ucs_cs94_table[f - WC_F_ISO_BASE];
     46     case WC_CCS_A_CS94W:
     47 	if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
     48 	    return NULL;
     49 	return &ucs_cs94w_table[f - WC_F_ISO_BASE];
     50     case WC_CCS_A_CS96:
     51 	if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
     52 	    return NULL;
     53 	return &ucs_cs96_table[f - WC_F_ISO_BASE];
     54     case WC_CCS_A_CS96W:
     55 	if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
     56 	    return NULL;
     57 	return &ucs_cs96w_table[f - WC_F_ISO_BASE];
     58     case WC_CCS_A_CS942:
     59 	if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
     60 	    return NULL;
     61 	return &ucs_cs942_table[f - WC_F_ISO_BASE];
     62     case WC_CCS_A_PCS:
     63 	if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
     64 	    return NULL;
     65 	return &ucs_pcs_table[f - WC_F_PCS_BASE];
     66     case WC_CCS_A_PCSW:
     67 	if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
     68 	    return NULL;
     69 	return &ucs_pcsw_table[f - WC_F_PCS_BASE];
     70     default:
     71 	return NULL;
     72     }
     73 }
     74 
     75 wc_wchar_t
     76 wc_ucs_to_any(wc_uint32 ucs, wc_table *t)
     77 {
     78     wc_wchar_t cc;
     79     wc_map *map;
     80 
     81     if (t && t->map && ucs && ucs <= WC_C_UCS2_END) {
     82 	map = wc_map_search((wc_uint16)ucs, t->map, t->n);
     83 	if (map)
     84 	    return t->conv(t->ccs, map->code2);
     85     }
     86     if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) {
     87 	if (t->ccs == WC_CCS_JIS_X_0213_1)
     88 	    map = wc_map_search((wc_uint16)(ucs & 0xffff),
     89 		ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map);
     90 	else if (t->ccs == WC_CCS_JIS_X_0213_2)
     91 	    map = wc_map_search((wc_uint16)(ucs & 0xffff),
     92 		ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map);
     93 	else if (t->ccs == WC_CCS_HKSCS ||
     94 		 t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2)
     95 	    map = wc_map_search((wc_uint16)(ucs & 0xffff),
     96 		ucs_p2_hkscs_map, N_ucs_p2_hkscs_map);
     97 	else
     98 	    map = NULL;
     99 	if (map)
    100 	    return t->conv(t->ccs, map->code2);
    101     }
    102     cc.ccs = WC_CCS_UNKNOWN;
    103     return cc;
    104 }
    105 
    106 wc_uint32
    107 wc_any_to_ucs(wc_wchar_t cc)
    108 {
    109     int f;
    110     wc_uint16 *map = NULL;
    111     wc_map *map2;
    112 
    113     f = WC_CCS_INDEX(cc.ccs);
    114     switch (WC_CCS_TYPE(cc.ccs)) {
    115     case WC_CCS_A_CS94:
    116 	if (cc.ccs == WC_CCS_US_ASCII)
    117 	    return cc.code;
    118 	if (f < WC_F_ISO_BASE || f > WC_F_CS94_END)
    119 	    return WC_C_UCS4_ERROR;
    120 	map = cs94_ucs_map[f - WC_F_ISO_BASE];
    121 	cc.code &= 0x7f;
    122 	break;
    123     case WC_CCS_A_CS94W:
    124 	if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) {
    125 	    cc.ccs = WC_CCS_GB_12345;
    126 	    return wc_any_to_ucs(cc);
    127 	} else if (cc.ccs == WC_CCS_JIS_X_0213_1) {
    128 	    map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
    129 		jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map);
    130 	    if (map2)
    131 		return map2->code2 | WC_C_UCS4_PLANE2;
    132 	} else if (cc.ccs == WC_CCS_JIS_X_0213_2) {
    133 	    map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f),
    134 		jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map);
    135 	    if (map2)
    136 		return map2->code2 | WC_C_UCS4_PLANE2;
    137 	}
    138 	if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END)
    139 	    return 0;
    140 	map = cs94w_ucs_map[f - WC_F_ISO_BASE];
    141 	cc.code = WC_CS94W_N(cc.code);
    142 	break;
    143     case WC_CCS_A_CS96:
    144 	if (f < WC_F_ISO_BASE || f > WC_F_CS96_END)
    145 	    return WC_C_UCS4_ERROR;
    146 	map = cs96_ucs_map[f - WC_F_ISO_BASE];
    147 	cc.code &= 0x7f;
    148 	break;
    149     case WC_CCS_A_CS96W:
    150 	if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END)
    151 	    return WC_C_UCS4_ERROR;
    152 	map = cs96w_ucs_map[f - WC_F_ISO_BASE];
    153 	cc.code = WC_CS96W_N(cc.code);
    154 	break;
    155     case WC_CCS_A_CS942:
    156 	if (f < WC_F_ISO_BASE || f > WC_F_CS942_END)
    157 	    return WC_C_UCS4_ERROR;
    158 	map = cs942_ucs_map[f - WC_F_ISO_BASE];
    159 	cc.code &= 0x7f;
    160 	break;
    161     case WC_CCS_A_PCS:
    162 	if (f < WC_F_PCS_BASE || f > WC_F_PCS_END)
    163 	    return WC_C_UCS4_ERROR;
    164 	switch (cc.ccs) {
    165 	case WC_CCS_CP1258_2:
    166 	    map2 = wc_map_search((wc_uint16)cc.code,
    167 		cp12582_ucs_map, N_cp12582_ucs_map);
    168 	    if (map2)
    169 		return map2->code2;
    170 	    return WC_C_UCS4_ERROR;
    171 	case WC_CCS_TCVN_5712_3:
    172 	    return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc));
    173 	case WC_CCS_GBK_80:
    174 	    return WC_C_UCS2_EURO;
    175 	}
    176 	map = pcs_ucs_map[f - WC_F_PCS_BASE];
    177 	cc.code &= 0x7f;
    178 	break;
    179     case WC_CCS_A_PCSW:
    180 	if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END)
    181 	    return WC_C_UCS4_ERROR;
    182 	map = pcsw_ucs_map[f - WC_F_PCS_BASE];
    183 	switch (cc.ccs) {
    184 	case WC_CCS_BIG5:
    185 	    cc.code = WC_BIG5_N(cc.code);
    186 	    break;
    187 	case WC_CCS_BIG5_2:
    188 	    cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE;
    189 	    break;
    190 	case WC_CCS_HKSCS_1:
    191 	case WC_CCS_HKSCS_2:
    192 	    cc = wc_cs128w_to_hkscs(cc);
    193 	case WC_CCS_HKSCS:
    194 	    map2 = wc_map_search((wc_uint16)cc.code,
    195 		hkscs_ucs_p2_map, N_hkscs_ucs_p2_map);
    196 	    if (map2)
    197 		return map2->code2 | WC_C_UCS4_PLANE2;
    198 	    cc.code = wc_hkscs_to_N(cc.code);
    199 	    break;
    200 	case WC_CCS_JOHAB:
    201 	    return wc_any_to_ucs(wc_johab_to_cs128w(cc));
    202 	case WC_CCS_JOHAB_1:
    203 	    return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL;
    204 	case WC_CCS_JOHAB_2:
    205 	    cc.code = WC_CS128W_N(cc.code);
    206 	    cc.code = WC_N_JOHAB2(cc.code);
    207 	    map2 = wc_map_search((wc_uint16)cc.code,
    208 		johab2_ucs_map, N_johab2_ucs_map);
    209 	    if (map2)
    210 		return map2->code2;
    211 	    return WC_C_UCS4_ERROR;
    212 	case WC_CCS_JOHAB_3:
    213 	    if ((cc.code & 0x7f7f) < 0x2121)
    214 		return WC_C_UCS4_ERROR;
    215 	case WC_CCS_SJIS_EXT:
    216 	    return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc));
    217 	case WC_CCS_SJIS_EXT_1:
    218 	    cc.code = wc_sjis_ext1_to_N(cc.code);
    219 	    if (cc.code == WC_C_SJIS_ERROR)
    220 		return WC_C_UCS4_ERROR;
    221 	    break;
    222 	case WC_CCS_SJIS_EXT_2:
    223 	    cc.code = wc_sjis_ext2_to_N(cc.code);
    224 	    if (cc.code == WC_C_SJIS_ERROR)
    225 		return WC_C_UCS4_ERROR;
    226 	    break;
    227 	case WC_CCS_GBK_1:
    228 	case WC_CCS_GBK_2:
    229 	    cc = wc_cs128w_to_gbk(cc);
    230 	case WC_CCS_GBK:
    231 	    cc.code = wc_gbk_to_N(cc.code);
    232 	    break;
    233 	case WC_CCS_GBK_EXT:
    234 	case WC_CCS_GBK_EXT_1:
    235 	case WC_CCS_GBK_EXT_2:
    236 	    return wc_gb18030_to_ucs(cc);
    237 	case WC_CCS_UHC_1:
    238 	case WC_CCS_UHC_2:
    239 	    cc = wc_cs128w_to_uhc(cc);
    240 	case WC_CCS_UHC:
    241 	    if (cc.code > WC_C_UHC_END)
    242 		return WC_C_UCS4_ERROR;
    243 	    cc.code = wc_uhc_to_N(cc.code);
    244 	    break;
    245 	default:
    246 	    cc.code = WC_CS94W_N(cc.code);
    247 	    break;
    248 	}
    249 	break;
    250     case WC_CCS_A_WCS16:
    251 	switch (WC_CCS_SET(cc.ccs)) {
    252 	case WC_CCS_UCS2:
    253 	    return cc.code;
    254 	}
    255 	return WC_C_UCS4_ERROR;
    256     case WC_CCS_A_WCS32:
    257 	switch (WC_CCS_SET(cc.ccs)) {
    258 	case WC_CCS_UCS4:
    259 	    return cc.code;
    260 	case WC_CCS_UCS_TAG:
    261 	    return wc_ucs_tag_to_ucs(cc.code);
    262 	case WC_CCS_GB18030:
    263 	    return wc_gb18030_to_ucs(cc);
    264 	}
    265 	return WC_C_UCS4_ERROR;
    266     case WC_CCS_A_UNKNOWN:
    267 	if (cc.ccs == WC_CCS_C1)
    268 	    return (cc.code | 0x80);
    269     default:
    270 	return WC_C_UCS4_ERROR;
    271     }
    272     if (map == NULL)
    273 	return WC_C_UCS4_ERROR;
    274     cc.code = map[cc.code];
    275     return cc.code ? cc.code : WC_C_UCS4_ERROR;
    276 }
    277 
    278 wc_wchar_t
    279 wc_any_to_any(wc_wchar_t cc, wc_table *t)
    280 {
    281     wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
    282     wc_uint32 ucs = wc_any_to_ucs(cc);
    283 
    284     if (ucs != WC_C_UCS4_ERROR) {
    285 	cc = wc_ucs_to_any(ucs, t);
    286 	if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    287 	    return cc;
    288 
    289 	ucs = wc_ucs_to_fullwidth(ucs);
    290 	if (ucs != WC_C_UCS4_ERROR) {
    291 	    cc = wc_ucs_to_any(ucs, t);
    292 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    293 		return cc;
    294 	}
    295     }
    296     cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    297     return cc;
    298 }
    299 
    300 wc_wchar_t
    301 wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist)
    302 {
    303     wc_wchar_t cc;
    304     wc_table **t;
    305 
    306     if (tlist != NULL) {
    307 	for (t = tlist; *t != NULL; t++) {
    308 	    if ((*t)->map == NULL)
    309 		continue;
    310 	    cc = wc_ucs_to_any(ucs, *t);
    311 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    312 		return cc;
    313 	}
    314     }
    315     cc.ccs = WC_CCS_UNKNOWN;
    316     return cc;
    317 }
    318 
    319 wc_wchar_t
    320 wc_any_to_any_ces(wc_wchar_t cc, wc_status *st)
    321 {
    322     wc_uint32 ucs = wc_any_to_ucs(cc);
    323     wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
    324 
    325     if (ucs < 0x80) {
    326 	cc.ccs = WC_CCS_US_ASCII;
    327 	cc.code = ucs;
    328 	return cc;
    329     }
    330     if (ucs != WC_C_UCS4_ERROR) {
    331 	if (st->ces_info->id & WC_CES_T_UTF) {
    332 	    cc.ccs = wc_ucs_to_ccs(ucs);
    333 	    cc.code = ucs;
    334 	    return cc;
    335 	} else if (st->ces_info->id == WC_CES_JOHAB) {
    336 	    cc = wc_ucs_to_johab(ucs);
    337 	    if (WC_CCS_IS_UNKNOWN(cc.ccs))
    338 		cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    339 	    return cc;
    340 	}
    341 	cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
    342 	if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    343 	    return cc;
    344 	if (! WcOption.fix_width_conv) {
    345 	    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
    346 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    347 		return cc;
    348 	}
    349 	if (st->ces_info->id == WC_CES_GB18030) {
    350 	    cc = wc_ucs_to_gb18030(ucs);
    351 	    if (WC_CCS_IS_UNKNOWN(cc.ccs))
    352 		cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    353 	    return cc;
    354 	}
    355 	if (ucs == WC_C_UCS2_NBSP) {	/* NBSP -> SP */
    356 	    cc.ccs = WC_CCS_US_ASCII;
    357 	    cc.code = 0x20;
    358 	    return cc;
    359 	}
    360 	if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) &&
    361 	    0x80 <= ucs && ucs <= 0x9F) {
    362 	    cc.ccs = WC_CCS_C1;
    363 	    cc.code = ucs;
    364 	    return cc;
    365 	}
    366 
    367 	ucs = wc_ucs_to_fullwidth(ucs);
    368 	if (ucs != WC_C_UCS4_ERROR) {
    369 	    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
    370 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    371 		return cc;
    372 	    if (! WcOption.fix_width_conv) {
    373 		cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
    374 		if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    375 		    return cc;
    376 	    }
    377 	}
    378     }
    379     cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    380     return cc;
    381 }
    382 
    383 wc_wchar_t
    384 wc_any_to_iso2022(wc_wchar_t cc, wc_status *st)
    385 {
    386     wc_uint32 ucs = wc_any_to_ucs(cc);
    387     wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs);
    388 
    389     if (ucs < 0x80) {
    390 	cc.ccs = WC_CCS_US_ASCII;
    391 	cc.code = ucs;
    392 	return cc;
    393     }
    394     if (ucs != WC_C_UCS4_ERROR) {
    395 	cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
    396 	if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    397 	    return cc;
    398 	if (! WcOption.strict_iso2022) {
    399 	    cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
    400 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    401 		return cc;
    402 	}
    403 	if (! WcOption.fix_width_conv) {
    404 	    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
    405 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    406 		return cc;
    407 	    if (! WcOption.strict_iso2022) {
    408 		cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
    409 		if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    410 		    return cc;
    411 	    }
    412 	}
    413 	if (ucs == WC_C_UCS2_NBSP) {	/* NBSP -> SP */
    414 	   cc.ccs = WC_CCS_US_ASCII;
    415 	   cc.code = 0x20;
    416 	   return cc;
    417 	}
    418 
    419 	ucs = wc_ucs_to_fullwidth(ucs);
    420 	if (ucs != WC_C_UCS4_ERROR) {
    421 	    cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist);
    422 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    423 		return cc;
    424 	    if (! WcOption.strict_iso2022) {
    425 		cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs);
    426 		if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    427 		    return cc;
    428 	    }
    429 	    if (! WcOption.fix_width_conv) {
    430 		cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw);
    431 		if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    432 		    return cc;
    433 		if (! WcOption.strict_iso2022) {
    434 		    cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs);
    435 		    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    436 			return cc;
    437 		}
    438 	    }
    439 	}
    440 	if (ucs == WC_C_UCS2_NBSP) {	/* NBSP -> SP */
    441 	   cc.ccs = WC_CCS_US_ASCII;
    442 	   cc.code = 0x20;
    443 	   return cc;
    444 	}
    445     }
    446     cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
    447     return cc;
    448 }
    449 
    450 wc_wchar_t
    451 wc_ucs_to_iso2022(wc_uint32 ucs)
    452 {
    453     wc_table *t;
    454     wc_wchar_t cc;
    455     int f;
    456 
    457     if (ucs <= WC_C_UCS2_END) {
    458 	for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) {
    459 	    t = &ucs_cs96_table[f];
    460 	    if (t->map == NULL)
    461 		continue;
    462 	    cc = wc_ucs_to_any((wc_uint16)ucs, t);
    463 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    464 		return cc;
    465 	}
    466 	for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) {
    467 	    t = &ucs_cs94_table[f];
    468 	    if (t->map == NULL)
    469 		continue;
    470 	    cc = wc_ucs_to_any((wc_uint16)ucs, t);
    471 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    472 		return cc;
    473 	}
    474 	for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) {
    475 	    t = &ucs_cs942_table[f];
    476 	    if (t->map == NULL)
    477 		continue;
    478 	    cc = wc_ucs_to_any((wc_uint16)ucs, t);
    479 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    480 		return cc;
    481 	}
    482     }
    483     cc.ccs = WC_CCS_UNKNOWN;
    484     return cc;
    485 }
    486 
    487 wc_wchar_t
    488 wc_ucs_to_iso2022w(wc_uint32 ucs)
    489 {
    490     wc_table *t;
    491     wc_wchar_t cc;
    492     int f;
    493 
    494     if (ucs <= WC_C_UCS2_END) {
    495 	for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) {
    496 	    t = &ucs_cs94w_table[f];
    497 	    if (t->map == NULL)
    498 		continue;
    499 	    cc = wc_ucs_to_any((wc_uint16)ucs, t);
    500 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    501 		return cc;
    502 	}
    503 	for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) {
    504 	    t = &ucs_cs96w_table[f];
    505 	    if (t->map == NULL)
    506 		continue;
    507 	    cc = wc_ucs_to_any((wc_uint16)ucs, t);
    508 	    if (!WC_CCS_IS_UNKNOWN(cc.ccs))
    509 		return cc;
    510 	}
    511     }
    512     cc.ccs = WC_CCS_UNKNOWN_W;
    513     return cc;
    514 }
    515 
    516 wc_ccs
    517 wc_ucs_to_ccs(wc_uint32 ucs)
    518 {
    519     if (0x80 <= ucs && ucs <= 0x9F)
    520 	return WC_CCS_C1;
    521     return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4)
    522 	| ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs))
    523 		    ? WC_CCS_A_WIDE : 0)
    524 	| (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0)
    525 	| (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0);
    526 }
    527 
    528 wc_bool
    529 wc_is_ucs_ambiguous_width(wc_uint32 ucs)
    530 {
    531     if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213)
    532 	return 1;
    533     else if (ucs <= WC_C_UCS2_END)
    534 	return (wc_map_range_search((wc_uint16)ucs,
    535 		    ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL);
    536     else
    537 	return ((0xF0000 <= ucs && ucs <= 0xFFFFD)
    538 		|| (0x100000 <= ucs && ucs <= 0x10FFFD));
    539 }
    540 
    541 wc_bool
    542 wc_is_ucs_wide(wc_uint32 ucs)
    543 {
    544     if (ucs <= WC_C_UCS2_END)
    545 	return (wc_map_range_search((wc_uint16)ucs,
    546 		ucs_wide_map, N_ucs_wide_map) != NULL);
    547     else
    548 	return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 ||
    549 		(ucs & ~0xFFFF) == WC_C_UCS4_PLANE3);
    550 }
    551 
    552 wc_bool
    553 wc_is_ucs_combining(wc_uint32 ucs)
    554 {
    555     return (WcOption.use_combining && ucs <= WC_C_UCS2_END &&
    556 	wc_map_range_search((wc_uint16)ucs,
    557 	ucs_combining_map, N_ucs_combining_map) != NULL);
    558 }
    559 
    560 wc_bool
    561 wc_is_ucs_hangul(wc_uint32 ucs)
    562 {
    563     return (ucs <= WC_C_UCS2_END &&
    564 	wc_map_range_search((wc_uint16)ucs,
    565 	ucs_hangul_map, N_ucs_hangul_map) != NULL);
    566 }
    567 
    568 wc_bool
    569 wc_is_ucs_alpha(wc_uint32 ucs)
    570 {
    571     return (ucs <= WC_C_UCS2_END &&
    572 	wc_map_range_search((wc_uint16)ucs,
    573 	ucs_isalpha_map, N_ucs_isalpha_map) != NULL);
    574 }
    575 
    576 wc_bool
    577 wc_is_ucs_digit(wc_uint32 ucs)
    578 {
    579     return (ucs <= WC_C_UCS2_END &&
    580 	wc_map_range_search((wc_uint16)ucs,
    581 	ucs_isdigit_map, N_ucs_isdigit_map) != NULL);
    582 }
    583 
    584 wc_bool
    585 wc_is_ucs_alnum(wc_uint32 ucs)
    586 {
    587     return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs));
    588 }
    589 
    590 wc_bool
    591 wc_is_ucs_lower(wc_uint32 ucs)
    592 {
    593     return (ucs <= WC_C_UCS2_END &&
    594 	wc_map_range_search((wc_uint16)ucs,
    595 	ucs_islower_map, N_ucs_islower_map) != NULL);
    596 }
    597 
    598 wc_bool
    599 wc_is_ucs_upper(wc_uint32 ucs)
    600 {
    601     return (ucs <= WC_C_UCS2_END &&
    602 	wc_map_range_search((wc_uint16)ucs,
    603 	ucs_isupper_map, N_ucs_isupper_map) != NULL);
    604 }
    605 
    606 wc_uint32
    607 wc_ucs_toupper(wc_uint32 ucs)
    608 {
    609     wc_map *conv = NULL;
    610     if (ucs <= WC_C_UCS2_END)
    611 	conv = wc_map_search((wc_uint16)ucs,
    612 			     ucs_toupper_map, N_ucs_toupper_map);
    613     return conv ? (wc_uint32)(conv->code2) : ucs;
    614 }
    615 
    616 wc_uint32
    617 wc_ucs_tolower(wc_uint32 ucs)
    618 {
    619     wc_map *conv = NULL;
    620     if (ucs <= WC_C_UCS2_END)
    621 	conv = wc_map_search((wc_uint16)ucs,
    622 			     ucs_tolower_map, N_ucs_tolower_map);
    623     return conv ? (wc_uint32)(conv->code2) : ucs;
    624 }
    625 
    626 wc_uint32
    627 wc_ucs_totitle(wc_uint32 ucs)
    628 {
    629     wc_map *conv = NULL;
    630     if (ucs <= WC_C_UCS2_END)
    631 	conv = wc_map_search((wc_uint16)ucs,
    632 			     ucs_totitle_map, N_ucs_totitle_map);
    633     return conv ? (wc_uint32)(conv->code2) : ucs;
    634 }
    635 
    636 wc_uint32
    637 wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2)
    638 {
    639     wc_map3 *map;
    640 
    641     if (WcOption.use_combining &&
    642 	ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END &&
    643 	(map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2,
    644 	ucs_precompose_map, N_ucs_precompose_map)) != NULL)
    645 	return map->code3;
    646     return WC_C_UCS4_ERROR;
    647 }
    648 
    649 wc_uint32
    650 wc_ucs_to_fullwidth(wc_uint32 ucs)
    651 {
    652     wc_map *map;
    653 
    654     if (ucs <= WC_C_UCS2_END &&
    655 	(map = wc_map_search((wc_uint16)ucs,
    656 	ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL)
    657 	return map->code2;
    658     return WC_C_UCS4_ERROR;
    659 }
    660 
    661 int
    662 wc_ucs_put_tag(char *p)
    663 {
    664     int i;
    665 
    666     if (p == NULL || *p == '\0')
    667 	return 0;
    668     for (i = 1; i <= n_tag_map; i++) {
    669 	if (!strcasecmp(p, tag_map[i]))
    670 	    return i;
    671     }
    672     n_tag_map++;
    673     if (n_tag_map == MAX_TAG_MAP)
    674 	return 0;
    675     tag_map[n_tag_map] = p;
    676     return n_tag_map;
    677 }
    678 
    679 char *
    680 wc_ucs_get_tag(int ntag)
    681 {
    682     if (ntag == 0 || ntag > n_tag_map)
    683 	return NULL;
    684     return tag_map[ntag];
    685 }
    686 
    687 void
    688 wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st)
    689 {
    690     wc_ccs ccs;
    691 
    692     if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) {
    693 	if (! WcOption.use_language_tag)
    694 	    return;
    695 	if (ucs == WC_C_LANGUAGE_TAG)
    696 	    st->tag = Strnew_size(4);
    697 	else if (ucs == WC_C_CANCEL_TAG) {
    698 	    st->tag = NULL;
    699 	    st->ntag = 0;
    700 	}  else if (st->tag && ucs >= WC_C_TAG_SPACE)
    701 	    Strcat_char(st->tag, (char)(ucs & 0x7f));
    702 	return;
    703     }
    704     if (st->tag) {
    705 	st->ntag = wc_ucs_put_tag(st->tag->ptr);
    706 	st->tag = NULL;
    707     }
    708     if (ucs < 0x80) {
    709 	if (st->ntag)
    710 	    wtf_push(os, WC_CCS_UCS_TAG,  wc_ucs_to_ucs_tag(ucs, st->ntag));
    711 	else
    712 	    Strcat_char(os, (char)ucs);
    713     } else {
    714 	ccs = wc_ucs_to_ccs(ucs);
    715 	if (st->ntag && ucs <= WC_C_UNICODE_END) {
    716 	    ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs);
    717 	    ucs = wc_ucs_to_ucs_tag(ucs, st->ntag);
    718 	}
    719 	wtf_push(os, ccs, ucs);
    720     }
    721 }
    722 
    723 #endif