hkscs.c (4885B)
1 2 #include "wc.h" 3 #include "big5.h" 4 #include "hkscs.h" 5 #include "search.h" 6 #include "wtf.h" 7 #ifdef USE_UNICODE 8 #include "ucs.h" 9 #endif 10 11 #define C0 WC_HKSCS_MAP_C0 12 #define GL WC_HKSCS_MAP_GL 13 #define C1 WC_HKSCS_MAP_C1 14 #define LB WC_HKSCS_MAP_LB 15 #define UB WC_HKSCS_MAP_UB 16 #define UH WC_HKSCS_MAP_UH 17 18 wc_uint8 WC_HKSCS_MAP[ 0x100 ] = { 19 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 20 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 21 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 22 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 23 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 24 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 25 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 26 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, 27 28 C1, C1, C1, C1, C1, C1, C1, C1, UH, UH, UH, UH, UH, UH, UH, UH, 29 UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, 30 UH, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 31 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 32 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 33 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 34 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 35 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, 36 }; 37 38 wc_wchar_t 39 wc_hkscs_to_cs128w(wc_wchar_t cc) 40 { 41 cc.code = WC_HKSCS_N(cc.code); 42 if (cc.code < 0x4000) 43 cc.ccs = WC_CCS_HKSCS_1; 44 else { 45 cc.ccs = WC_CCS_HKSCS_2; 46 cc.code -= 0x4000; 47 } 48 cc.code = WC_N_CS128W(cc.code); 49 return cc; 50 } 51 52 wc_wchar_t 53 wc_cs128w_to_hkscs(wc_wchar_t cc) 54 { 55 cc.code = WC_CS128W_N(cc.code); 56 if (cc.ccs == WC_CCS_HKSCS_2) 57 cc.code += 0x4000; 58 cc.ccs = WC_CCS_HKSCS; 59 cc.code = WC_N_HKSCS(cc.code); 60 return cc; 61 } 62 63 wc_uint32 64 wc_hkscs_to_N(wc_uint32 c) 65 { 66 if (c < 0xA140) /* 0x8840 - 0xA0FE */ 67 return WC_HKSCS_N(c); 68 /* 0xFA40 - 0xFEFE */ 69 return WC_HKSCS_N(c) - 0x59 * 0x9D; 70 } 71 72 Str 73 wc_conv_from_hkscs(Str is, wc_ces ces) 74 { 75 Str os; 76 wc_uchar *sp = (wc_uchar *)is->ptr; 77 wc_uchar *ep = sp + is->length; 78 wc_uchar *p; 79 int state = WC_HKSCS_NOSTATE; 80 wc_uint32 hkscs; 81 82 for (p = sp; p < ep && *p < 0x80; p++) 83 ; 84 if (p == ep) 85 return is; 86 os = Strnew_size(is->length); 87 if (p > sp) 88 Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); 89 90 for (; p < ep; p++) { 91 switch (state) { 92 case WC_HKSCS_NOSTATE: 93 switch (WC_HKSCS_MAP[*p]) { 94 case UB: 95 case UH: 96 state = WC_HKSCS_MBYTE1; 97 break; 98 case C1: 99 wtf_push_unknown(os, p, 1); 100 break; 101 default: 102 Strcat_char(os, (char)*p); 103 break; 104 } 105 break; 106 case WC_HKSCS_MBYTE1: 107 if (WC_HKSCS_MAP[*p] & LB) { 108 hkscs = ((wc_uint32)*(p-1) << 8) | *p; 109 if (*(p-1) >= 0xA1 && *(p-1) <= 0xF9) 110 wtf_push(os, WC_CCS_BIG5, hkscs); 111 else 112 wtf_push(os, WC_CCS_HKSCS, hkscs); 113 } else 114 wtf_push_unknown(os, p-1, 2); 115 state = WC_HKSCS_NOSTATE; 116 break; 117 } 118 } 119 switch (state) { 120 case WC_HKSCS_MBYTE1: 121 wtf_push_unknown(os, p-1, 1); 122 break; 123 } 124 return os; 125 } 126 127 void 128 wc_push_to_hkscs(Str os, wc_wchar_t cc, wc_status *st) 129 { 130 while (1) { 131 switch (cc.ccs) { 132 case WC_CCS_US_ASCII: 133 Strcat_char(os, (char)cc.code); 134 return; 135 case WC_CCS_BIG5_1: 136 case WC_CCS_BIG5_2: 137 cc = wc_cs94w_to_big5(cc); 138 case WC_CCS_BIG5: 139 Strcat_char(os, (char)(cc.code >> 8)); 140 Strcat_char(os, (char)(cc.code & 0xff)); 141 return; 142 case WC_CCS_HKSCS_1: 143 case WC_CCS_HKSCS_2: 144 cc = wc_cs128w_to_hkscs(cc); 145 case WC_CCS_HKSCS: 146 Strcat_char(os, (char)(cc.code >> 8)); 147 Strcat_char(os, (char)(cc.code & 0xff)); 148 return; 149 case WC_CCS_UNKNOWN_W: 150 if (!WcOption.no_replace) 151 Strcat_charp(os, WC_REPLACE_W); 152 return; 153 case WC_CCS_UNKNOWN: 154 if (!WcOption.no_replace) 155 Strcat_charp(os, WC_REPLACE); 156 return; 157 default: 158 #ifdef USE_UNICODE 159 if (WcOption.ucs_conv) 160 cc = wc_any_to_any_ces(cc, st); 161 else 162 #endif 163 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 164 continue; 165 } 166 } 167 } 168 169 Str 170 wc_char_conv_from_hkscs(wc_uchar c, wc_status *st) 171 { 172 static Str os; 173 static wc_uchar hkscsu; 174 wc_uint32 hkscs; 175 176 if (st->state == -1) { 177 st->state = WC_HKSCS_NOSTATE; 178 os = Strnew_size(8); 179 } 180 181 switch (st->state) { 182 case WC_HKSCS_NOSTATE: 183 switch (WC_HKSCS_MAP[c]) { 184 case UB: 185 case UH: 186 hkscsu = c; 187 st->state = WC_HKSCS_MBYTE1; 188 return NULL; 189 case C1: 190 break; 191 default: 192 Strcat_char(os, (char)c); 193 break; 194 } 195 break; 196 case WC_HKSCS_MBYTE1: 197 if (WC_HKSCS_MAP[c] & LB) { 198 hkscs = ((wc_uint32)hkscsu << 8) | c; 199 if (hkscsu >= 0xA1 && hkscsu <= 0xF9 && c >= 0xA1) 200 wtf_push(os, WC_CCS_BIG5, hkscs); 201 else 202 wtf_push(os, WC_CCS_HKSCS, hkscs); 203 } 204 break; 205 } 206 st->state = -1; 207 return os; 208 }