ces.c (14732B)
1 2 #include "wc.h" 3 #include "iso2022.h" 4 #include "sjis.h" 5 #include "hz.h" 6 #include "big5.h" 7 #include "hkscs.h" 8 #include "johab.h" 9 #include "gbk.h" 10 #include "gb18030.h" 11 #include "uhc.h" 12 #include "viet.h" 13 #include "priv.h" 14 #ifdef USE_UNICODE 15 #include "utf8.h" 16 #include "utf7.h" 17 #endif 18 19 static wc_gset gset_usascii[] = { 20 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 21 { 0, 0, 0 }, 22 }; 23 24 #define gset_iso8859(no) \ 25 static wc_gset gset_iso8859##no[] = { \ 26 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, \ 27 { WC_CCS_ISO_8859_##no, WC_C_G1_CS96 | 0x80, 1 }, \ 28 { 0, 0, 0 }, \ 29 } 30 gset_iso8859(1); gset_iso8859(2); gset_iso8859(3); gset_iso8859(4); 31 gset_iso8859(5); gset_iso8859(6); gset_iso8859(7); gset_iso8859(8); 32 gset_iso8859(9); gset_iso8859(10); gset_iso8859(11); 33 gset_iso8859(13); gset_iso8859(14); gset_iso8859(15); gset_iso8859(16); 34 35 #define gset_cp(no) gset_priv1(CP##no, cp##no) 36 #define gset_priv1(ccs, ces) \ 37 static wc_gset gset_##ces[] = { \ 38 { WC_CCS_US_ASCII, 0, 1 }, \ 39 { WC_CCS_##ccs, 0x80, 1 }, \ 40 { 0, 0, 0 }, \ 41 } 42 gset_cp(437); gset_cp(737); gset_cp(775); gset_cp(850); gset_cp(852); 43 gset_cp(855); gset_cp(856); gset_cp(857); gset_cp(860); gset_cp(861); 44 gset_cp(862); gset_cp(863); gset_cp(864); gset_cp(865); gset_cp(866); 45 gset_cp(869); gset_cp(874); gset_cp(1006); 46 gset_cp(1250); gset_cp(1251); gset_cp(1252); gset_cp(1253); gset_cp(1254); 47 gset_cp(1255); gset_cp(1256); gset_cp(1257); 48 gset_priv1(KOI8_R, koi8r); 49 gset_priv1(KOI8_U, koi8u); 50 gset_priv1(NEXTSTEP, nextstep); 51 52 static wc_gset gset_iso2022jp[] = { 53 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 54 { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, 55 { 0, 0, 0 }, 56 }; 57 static wc_gset gset_iso2022jp2[] = { 58 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 59 { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, 60 { WC_CCS_JIS_X_0212, WC_C_G0_CS94, 0 }, 61 { WC_CCS_GB_2312, WC_C_G0_CS94, 0 }, 62 { WC_CCS_KS_X_1001, WC_C_G0_CS94, 0 }, 63 { WC_CCS_ISO_8859_1, WC_C_G2_CS96, 0 }, 64 { WC_CCS_ISO_8859_7, WC_C_G2_CS96, 0 }, 65 { 0, 0, 0 }, 66 }; 67 static wc_gset gset_iso2022jp3[] = { 68 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 69 { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, 70 { WC_CCS_JIS_X_0213_1, WC_C_G0_CS94, 0 }, 71 { WC_CCS_JIS_X_0213_2, WC_C_G0_CS94, 0 }, 72 { 0, 0, 0 }, 73 }; 74 static wc_gset gset_iso2022cn[] = { 75 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 76 { WC_CCS_GB_2312, WC_C_G1_CS94, 1 }, 77 { WC_CCS_ISO_IR_165, WC_C_G1_CS94, 0 }, 78 { WC_CCS_CNS_11643_1, WC_C_G1_CS94, 0 }, 79 { WC_CCS_CNS_11643_2, WC_C_G2_CS94, 0 }, 80 { WC_CCS_CNS_11643_3, WC_C_G3_CS94, 0 }, 81 { WC_CCS_CNS_11643_4, WC_C_G3_CS94, 0 }, 82 { WC_CCS_CNS_11643_5, WC_C_G3_CS94, 0 }, 83 { WC_CCS_CNS_11643_6, WC_C_G3_CS94, 0 }, 84 { WC_CCS_CNS_11643_7, WC_C_G3_CS94, 0 }, 85 { 0, 0, 0 }, 86 }; 87 static wc_gset gset_iso2022kr[] = { 88 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 89 { WC_CCS_KS_X_1001, WC_C_G1_CS94, 1 }, 90 { 0, 0, 0 }, 91 }; 92 static wc_uchar gset_ext_iso2022jp[] = { 93 WC_C_G0_CS94, WC_C_G2_CS96, WC_C_G0_CS94, WC_C_G2_CS96 94 }; 95 static wc_uchar gset_ext_iso2022cn[] = { 96 WC_C_G2_CS94, WC_C_G2_CS96, WC_C_G2_CS94, WC_C_G2_CS96 97 }; 98 static wc_uchar gset_ext_iso2022kr[] = { 99 WC_C_G1_CS94, WC_C_G1_CS96, WC_C_G1_CS94, WC_C_G1_CS96 100 }; 101 static wc_gset gset_eucjp[] = { 102 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 103 { WC_CCS_JIS_X_0208, WC_C_G1_CS94 | 0x80, 1 }, 104 { WC_CCS_JIS_X_0201K, WC_C_G2_CS94 | 0x80, 1 }, 105 { WC_CCS_JIS_X_0213_1, WC_C_G1_CS94 | 0x80, 0 }, 106 { WC_CCS_JIS_X_0213_2, WC_C_G3_CS94 | 0x80, 0 }, 107 { WC_CCS_JIS_X_0212, WC_C_G3_CS94 | 0x80, 1 }, 108 { 0, 0, 0 }, 109 }; 110 static wc_gset gset_euccn[] = { 111 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 112 { WC_CCS_GB_2312, WC_C_G1_CS94 | 0x80, 1 }, 113 { 0, 0, 0 }, 114 }; 115 static wc_gset gset_euctw[] = { 116 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 117 { WC_CCS_CNS_11643_1, WC_C_G1_CS94 | 0x80, 1 }, 118 { WC_CCS_CNS_11643_X, WC_C_G2_CS94 | 0x80, 1 }, 119 { 0, 0, 0 }, 120 }; 121 static wc_gset gset_euckr[] = { 122 { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, 123 { WC_CCS_KS_X_1001, WC_C_G1_CS94 | 0x80, 1 }, 124 { 0, 0, 0 }, 125 }; 126 static wc_gset gset_sjis[] = { 127 { WC_CCS_US_ASCII, 0, 1 }, 128 { WC_CCS_JIS_X_0208, 0x80, 1 }, 129 { WC_CCS_JIS_X_0201K, 0x80, 1 }, 130 { WC_CCS_SJIS_EXT_1, 0x80, 1 }, 131 { WC_CCS_SJIS_EXT_2, 0x80, 1 }, 132 { WC_CCS_SJIS_EXT, 0x80, 1 }, 133 { 0, 0, 0 }, 134 }; 135 static wc_gset gset_sjisx0213[] = { 136 { WC_CCS_US_ASCII, 0, 1 }, 137 { WC_CCS_JIS_X_0208, 0x80, 1 }, 138 { WC_CCS_JIS_X_0201K, 0x80, 1 }, 139 { WC_CCS_JIS_X_0213_1, 0x80, 1 }, 140 { WC_CCS_JIS_X_0213_2, 0x80, 1 }, 141 { 0, 0, 0 }, 142 }; 143 static wc_gset gset_hz[] = { 144 { WC_CCS_US_ASCII, 0, 1 }, 145 { WC_CCS_GB_2312, 0, 0 }, 146 { 0, 0, 0 }, 147 }; 148 static wc_gset gset_big5[] = { 149 { WC_CCS_US_ASCII, 0, 1 }, 150 { WC_CCS_BIG5_1, 0x80, 1 }, 151 { WC_CCS_BIG5_2, 0x80, 1 }, 152 { WC_CCS_BIG5, 0x80, 1 }, 153 { 0, 0, 0 }, 154 }; 155 static wc_gset gset_hkscs[] = { 156 { WC_CCS_US_ASCII, 0, 1 }, 157 { WC_CCS_BIG5_1, 0x80, 1 }, 158 { WC_CCS_BIG5_2, 0x80, 1 }, 159 { WC_CCS_BIG5, 0x80, 1 }, 160 { WC_CCS_HKSCS_1, 0x80, 1 }, 161 { WC_CCS_HKSCS_2, 0x80, 1 }, 162 { WC_CCS_HKSCS, 0x80, 1 }, 163 { 0, 0, 0 }, 164 }; 165 static wc_gset gset_johab[] = { 166 { WC_CCS_US_ASCII, 0, 1 }, 167 { WC_CCS_JOHAB_1, 0x80, 1 }, 168 { WC_CCS_JOHAB_2, 0x80, 1 }, 169 { WC_CCS_JOHAB_3, 0x80, 1 }, 170 { WC_CCS_JOHAB, 0x80, 1 }, 171 { 0, 0, 0 }, 172 }; 173 static wc_gset gset_gbk[] = { 174 { WC_CCS_US_ASCII, 0, 1 }, 175 { WC_CCS_GB_2312, 0x80, 1 }, 176 { WC_CCS_GBK_80, 0x80, 1 }, 177 { WC_CCS_GBK_1, 0x80, 1 }, 178 { WC_CCS_GBK_2, 0x80, 1 }, 179 { WC_CCS_GBK, 0x80, 1 }, 180 { 0, 0, 0 }, 181 }; 182 static wc_gset gset_gb18030[] = { 183 { WC_CCS_US_ASCII, 0, 1 }, 184 { WC_CCS_GB_2312, 0x80, 1 }, 185 { WC_CCS_GBK_1, 0x80, 1 }, 186 { WC_CCS_GBK_2, 0x80, 1 }, 187 { WC_CCS_GBK, 0x80, 1 }, 188 { WC_CCS_GBK_EXT_1, 0x80, 1 }, 189 { WC_CCS_GBK_EXT_2, 0x80, 1 }, 190 { WC_CCS_GBK_EXT, 0x80, 1 }, 191 { WC_CCS_GB18030, 0x80, 1 }, 192 { 0, 0, 0 }, 193 }; 194 static wc_gset gset_uhc[] = { 195 { WC_CCS_US_ASCII, 0, 1 }, 196 { WC_CCS_KS_X_1001, 0x80, 1 }, 197 { WC_CCS_UHC_1, 0x80, 1 }, 198 { WC_CCS_UHC_2, 0x80, 1 }, 199 { WC_CCS_UHC, 0x80, 1 }, 200 { 0, 0, 0 }, 201 }; 202 #define gset_priv2(ccs, ces) \ 203 static wc_gset gset_##ces[] = { \ 204 { WC_CCS_US_ASCII, 0, 1 }, \ 205 { WC_CCS_##ccs##_1, 0x80, 1 }, \ 206 { WC_CCS_##ccs##_2, 0x80, 1 }, \ 207 { 0, 0, 0 }, \ 208 } 209 gset_priv2(CP1258, cp1258); 210 gset_priv2(VISCII_11, viscii11); 211 gset_priv2(VPS, vps); 212 static wc_gset gset_tcvn5712[] = { 213 { WC_CCS_US_ASCII, 0, 1 }, 214 { WC_CCS_TCVN_5712_1, 0x80, 1 }, 215 { WC_CCS_TCVN_5712_2, 0x80, 1 }, 216 { WC_CCS_TCVN_5712_3, 0x80, 1 }, 217 { 0, 0, 0 }, 218 }; 219 220 #ifdef USE_UNICODE 221 static wc_gset gset_utf8[] = { 222 { WC_CCS_US_ASCII, 0, 1 }, 223 { WC_CCS_UCS2, 0x80, 1 }, 224 { WC_CCS_UCS4, 0x80, 1 }, 225 { WC_CCS_UCS_TAG, 0x80, 1 }, 226 { 0, 0, 0 }, 227 }; 228 static wc_gset gset_utf7[] = { 229 { WC_CCS_US_ASCII, 0, 1 }, 230 { WC_CCS_UCS2, 0x80, 1 }, 231 { WC_CCS_UCS4, 0x80, 1 }, 232 { WC_CCS_UCS_TAG, 0x80, 1 }, 233 { 0, 0, 0 }, 234 }; 235 #endif 236 237 static wc_gset gset_raw[] = { 238 { WC_CCS_US_ASCII, 0, 1 }, 239 { WC_CCS_RAW, 0x80, 1 }, 240 { 0, 0, 0 }, 241 }; 242 243 #define ces_ascii(id,name,desc) \ 244 { WC_CES_##id, name, desc, gset_usascii, NULL, \ 245 (void *)wc_conv_from_ascii, (void *)wc_push_to_iso8859, \ 246 (void *)wc_char_conv_from_iso2022 } 247 #define ces_iso8859(id,name,desc,no) \ 248 { WC_CES_##id, name, desc, gset_iso8859##no, NULL, \ 249 (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso8859, \ 250 (void *)wc_char_conv_from_iso2022 } 251 #define ces_priv1(id,name,desc,ces) \ 252 { WC_CES_##id, name, desc, gset_##ces, NULL, \ 253 (void *)wc_conv_from_priv1, (void *)wc_push_to_priv1, \ 254 (void *)wc_char_conv_from_priv1 } 255 #define ces_iso2022(id,name,desc,terr) \ 256 { WC_CES_##id, name, desc, gset_iso2022##terr, gset_ext_iso2022##terr, \ 257 (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso2022, \ 258 (void *)wc_char_conv_from_iso2022 } 259 #define ces_euc(id,name,desc,terr) \ 260 { WC_CES_##id, name, desc, gset_euc##terr, NULL, \ 261 (void *)wc_conv_from_iso2022, (void *)wc_push_to_euc##terr, \ 262 (void *)wc_char_conv_from_iso2022 } 263 #define ces_priv2(id,name,desc,ces) \ 264 { WC_CES_##id, name, desc, gset_##ces, NULL, \ 265 (void *)wc_conv_from_##ces, (void *)wc_push_to_##ces, \ 266 (void *)wc_char_conv_from_##ces } 267 268 #define gset_ext_iso2022jp2 gset_ext_iso2022jp 269 #define gset_ext_iso2022jp3 gset_ext_iso2022jp 270 #define wc_push_to_euckr wc_push_to_euc 271 #define wc_push_to_euccn wc_push_to_euc 272 #define wc_push_to_priv1 wc_push_to_iso8859 273 #define wc_push_to_cp1258 wc_push_to_viet 274 #define wc_push_to_tcvn5712 wc_push_to_viet 275 #define wc_push_to_viscii11 wc_push_to_viet 276 #define wc_push_to_vps wc_push_to_viet 277 #define wc_conv_from_cp1258 wc_conv_from_priv1 278 #define wc_conv_from_tcvn5712 wc_conv_from_viet 279 #define wc_conv_from_viscii11 wc_conv_from_viet 280 #define wc_conv_from_vps wc_conv_from_viet 281 #define wc_conv_from_raw wc_conv_from_priv1 282 #define wc_char_conv_from_hz wc_char_conv_from_iso2022 283 #define wc_char_conv_from_cp1258 wc_char_conv_from_priv1 284 #define wc_char_conv_from_tcvn5712 wc_char_conv_from_viet 285 #define wc_char_conv_from_viscii11 wc_char_conv_from_viet 286 #define wc_char_conv_from_vps wc_char_conv_from_viet 287 #define wc_char_conv_from_raw wc_char_conv_from_priv1 288 289 wc_ces_info WcCesInfo[] = { 290 ces_ascii(US_ASCII, "US-ASCII", "Latin (US-ASCII)"), 291 292 ces_iso8859(ISO_8859_1, "ISO-8859-1", "Latin 1 (ISO-8859-1)", 1), 293 ces_iso8859(ISO_8859_2, "ISO-8859-2", "Latin 2 (ISO-8859-2)", 2), 294 ces_iso8859(ISO_8859_3, "ISO-8859-3", "Latin 3 (ISO-8859-3)", 3), 295 ces_iso8859(ISO_8859_4, "ISO-8859-4", "Latin 4 (ISO-8859-4)", 4), 296 ces_iso8859(ISO_8859_5, "ISO-8859-5", "Cyrillic (ISO-8859-5)", 5), 297 ces_iso8859(ISO_8859_6, "ISO-8859-6", "Arabic (ISO-8859-6)", 6), 298 ces_iso8859(ISO_8859_7, "ISO-8859-7", "Greek (ISO-8859-7)", 7), 299 ces_iso8859(ISO_8859_8, "ISO-8859-8", "Hebrew (ISO-8859-8)", 8), 300 ces_iso8859(ISO_8859_9, "ISO-8859-9", "Turkish (ISO-8859-9)", 9), 301 ces_iso8859(ISO_8859_10, "ISO-8859-10", "Nordic (ISO-8859-10)", 10), 302 ces_iso8859(ISO_8859_11, "ISO-8859-11", "Thai (ISO-8859-11, TIS-620)", 11), 303 { WC_CES_ISO_8859_12, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, 304 ces_iso8859(ISO_8859_13, "ISO-8859-13", "Baltic Rim (ISO-8859-13)", 13), 305 ces_iso8859(ISO_8859_14, "ISO-8859-14", "Celtic (ISO-8859-14)", 14), 306 ces_iso8859(ISO_8859_15, "ISO-8859-15", "Latin 9 (ISO-8859-15)", 15), 307 ces_iso8859(ISO_8859_16, "ISO-8859-16", "Romanian (ISO-8859-16)", 16), 308 309 ces_iso2022(ISO_2022_JP, "ISO-2022-JP", "Japanese (ISO-2022-JP)", jp), 310 ces_iso2022(ISO_2022_JP_2, "ISO-2022-JP-2", "Japanese (ISO-2022-JP-2)", jp2), 311 ces_iso2022(ISO_2022_JP_3, "ISO-2022-JP-3", "Japanese (ISO-2022-JP-3)", jp3), 312 ces_iso2022(ISO_2022_CN, "ISO-2022-CN", "Chinese (ISO-2022-CN)", cn), 313 ces_iso2022(ISO_2022_KR, "ISO-2022-KR", "Korean (ISO-2022-KR)", kr), 314 315 ces_euc(EUC_JP, "EUC-JP", "Japanese (EUC-JP)", jp), 316 ces_euc(EUC_CN, "EUC-CN", "Chinese (EUC-CN, GB2312)", cn), 317 ces_euc(EUC_TW, "EUC-TW", "Chinese Taiwan (EUC-TW)", tw), 318 ces_euc(EUC_KR, "EUC-KR", "Korean (EUC-KR)", kr), 319 320 ces_priv1(CP437, "CP437", "Latin (CP437)", cp437), 321 ces_priv1(CP737, "CP737", "Greek (CP737)", cp737), 322 ces_priv1(CP775, "CP775", "Baltic Rim (CP775)", cp775), 323 ces_priv1(CP850, "CP850", "Latin 1 (CP850)", cp850), 324 ces_priv1(CP852, "CP852", "Latin 2 (CP852)", cp852), 325 ces_priv1(CP855, "CP855", "Cyrillic (CP855)", cp855), 326 ces_priv1(CP856, "CP856", "Hebrew (CP856)", cp856), 327 ces_priv1(CP857, "CP857", "Turkish (CP857)", cp857), 328 ces_priv1(CP860, "CP860", "Portuguese (CP860)", cp860), 329 ces_priv1(CP861, "CP861", "Icelandic (CP861)", cp861), 330 ces_priv1(CP862, "CP862", "Hebrew (CP862)", cp862), 331 ces_priv1(CP863, "CP863", "Canada French (CP863)", cp863), 332 ces_priv1(CP864, "CP864", "Arabic (CP864)", cp864), 333 ces_priv1(CP865, "CP865", "Nordic (CP865)", cp865), 334 ces_priv1(CP866, "CP866", "Cyrillic (CP866)", cp866), 335 ces_priv1(CP869, "CP869", "Greek 2 (CP869)", cp869), 336 ces_priv1(CP874, "CP874", "Thai (CP874)", cp874), 337 ces_priv1(CP1006, "CP1006", "Arabic (CP1006)", cp1006), 338 ces_priv1(CP1250, "CP1250", "Latin 2 (CP1250)", cp1250), 339 ces_priv1(CP1251, "CP1251", "Cyrillic (CP1251)", cp1251), 340 ces_priv1(CP1252, "CP1252", "Latin 1 (CP1252)", cp1252), 341 ces_priv1(CP1253, "CP1253", "Greek (CP1253)", cp1253), 342 ces_priv1(CP1254, "CP1254", "Turkish (CP1254)", cp1254), 343 ces_priv1(CP1255, "CP1255", "Hebrew (CP1255)", cp1255), 344 ces_priv1(CP1256, "CP1256", "Arabic (CP1256)", cp1256), 345 ces_priv1(CP1257, "CP1257", "Baltic Rim (CP1257)", cp1257), 346 ces_priv1(KOI8_R, "KOI8-R", "Cyrillic (KOI8-R)", koi8r), 347 ces_priv1(KOI8_U, "KOI8-U", "Ukrainian (KOI8-U)", koi8u), 348 ces_priv1(NEXTSTEP, "NeXTSTEP", "NeXTSTEP", nextstep), 349 350 ces_priv2(RAW, "Raw", "8bit Raw", raw), 351 352 ces_priv2(SHIFT_JIS, "Shift_JIS", "Japanese (Shift_JIS, CP932)", sjis), 353 ces_priv2(SHIFT_JISX0213, "Shift_JISX0213", "Japanese (Shift_JISX0213)", sjisx0213), 354 ces_priv2(GBK, "GBK", "Chinese (GBK, CP936)", gbk), 355 ces_priv2(GB18030, "GB18030", "Chinese (GB18030)", gb18030), 356 ces_priv2(HZ_GB_2312, "HZ-GB-2312", "Chinese (HZ-GB-2312)", hz), 357 ces_priv2(BIG5, "Big5", "Chinese Taiwan (Big5, CP950)", big5), 358 ces_priv2(HKSCS, "HKSCS", "Chinese Hong Kong (HKSCS)", hkscs), 359 ces_priv2(UHC, "UHC", "Korean (UHC, CP949)", uhc), 360 ces_priv2(JOHAB, "Johab", "Korean (Johab)", johab), 361 362 ces_priv2(CP1258, "CP1258", "Vietnamese (CP1258)", cp1258), 363 ces_priv2(TCVN_5712, "TCVN-5712", "Vietnamese (TCVN-5712)", tcvn5712), 364 ces_priv2(VISCII_11, "VISCII-1.1", "Vietnamese (VISCII 1.1)", viscii11), 365 ces_priv2(VPS, "VPS", "Vietnamese (VPS)", vps), 366 367 #ifdef USE_UNICODE 368 ces_priv2(UTF_8, "UTF-8", "Unicode (UTF-8)", utf8), 369 ces_priv2(UTF_7, "UTF-7", "Unicode (UTF-7)", utf7), 370 #else 371 { WC_CES_UTF_8, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, 372 { WC_CES_UTF_7, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, 373 #endif 374 { 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, 375 };