big5.c (4157B)
1 2 #include "wc.h" 3 #include "big5.h" 4 #include "search.h" 5 #include "wtf.h" 6 #ifdef USE_UNICODE 7 #include "ucs.h" 8 #endif 9 10 #define C0 WC_BIG5_MAP_C0 11 #define GL WC_BIG5_MAP_GL 12 #define C1 WC_BIG5_MAP_C1 13 #define LB WC_BIG5_MAP_LB 14 #define UB WC_BIG5_MAP_UB 15 16 wc_uint8 WC_BIG5_MAP[ 0x100 ] = { 17 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 18 C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, 19 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 20 GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, 21 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 22 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 23 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, 24 LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, 25 26 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 27 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 28 C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 29 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 30 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 31 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 32 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, 33 UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, 34 }; 35 36 wc_wchar_t 37 wc_big5_to_cs94w(wc_wchar_t cc) 38 { 39 cc.code = WC_BIG5_N(cc.code); 40 if (cc.code < WC_C_BIG5_2_BASE) 41 cc.ccs = WC_CCS_BIG5_1; 42 else { 43 cc.ccs = WC_CCS_BIG5_2; 44 cc.code -= WC_C_BIG5_2_BASE; 45 } 46 cc.code = WC_N_CS94W(cc.code); 47 return cc; 48 } 49 50 wc_wchar_t 51 wc_cs94w_to_big5(wc_wchar_t cc) 52 { 53 cc.code = WC_CS94W_N(cc.code); 54 if (cc.ccs == WC_CCS_BIG5_2) 55 cc.code += WC_C_BIG5_2_BASE; 56 cc.code = WC_N_BIG5(cc.code); 57 cc.ccs = WC_CCS_BIG5; 58 return cc; 59 } 60 61 Str 62 wc_conv_from_big5(Str is, wc_ces ces) 63 { 64 Str os; 65 wc_uchar *sp = (wc_uchar *)is->ptr; 66 wc_uchar *ep = sp + is->length; 67 wc_uchar *p; 68 int state = WC_BIG5_NOSTATE; 69 70 for (p = sp; p < ep && *p < 0x80; p++) 71 ; 72 if (p == ep) 73 return is; 74 os = Strnew_size(is->length); 75 if (p > sp) 76 Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); 77 78 for (; p < ep; p++) { 79 switch (state) { 80 case WC_BIG5_NOSTATE: 81 switch (WC_BIG5_MAP[*p]) { 82 case UB: 83 state = WC_BIG5_MBYTE1; 84 break; 85 case C1: 86 wtf_push_unknown(os, p, 1); 87 break; 88 default: 89 Strcat_char(os, (char)*p); 90 break; 91 } 92 break; 93 case WC_BIG5_MBYTE1: 94 if (WC_BIG5_MAP[*p] & LB) 95 wtf_push(os, WC_CCS_BIG5, ((wc_uint32)*(p-1) << 8) | *p); 96 else 97 wtf_push_unknown(os, p-1, 2); 98 state = WC_BIG5_NOSTATE; 99 break; 100 } 101 } 102 switch (state) { 103 case WC_BIG5_MBYTE1: 104 wtf_push_unknown(os, p-1, 1); 105 break; 106 } 107 return os; 108 } 109 110 void 111 wc_push_to_big5(Str os, wc_wchar_t cc, wc_status *st) 112 { 113 while (1) { 114 switch (cc.ccs) { 115 case WC_CCS_US_ASCII: 116 Strcat_char(os, (char)cc.code); 117 return; 118 case WC_CCS_BIG5_1: 119 case WC_CCS_BIG5_2: 120 cc = wc_cs94w_to_big5(cc); 121 case WC_CCS_BIG5: 122 Strcat_char(os, (char)(cc.code >> 8)); 123 Strcat_char(os, (char)(cc.code & 0xff)); 124 return; 125 case WC_CCS_UNKNOWN_W: 126 if (!WcOption.no_replace) 127 Strcat_charp(os, WC_REPLACE_W); 128 return; 129 case WC_CCS_UNKNOWN: 130 if (!WcOption.no_replace) 131 Strcat_charp(os, WC_REPLACE); 132 return; 133 default: 134 #ifdef USE_UNICODE 135 if (WcOption.ucs_conv) 136 cc = wc_any_to_any_ces(cc, st); 137 else 138 #endif 139 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 140 continue; 141 } 142 } 143 } 144 145 Str 146 wc_char_conv_from_big5(wc_uchar c, wc_status *st) 147 { 148 static Str os; 149 static wc_uchar big5u; 150 151 if (st->state == -1) { 152 st->state = WC_BIG5_NOSTATE; 153 os = Strnew_size(8); 154 } 155 156 switch (st->state) { 157 case WC_BIG5_NOSTATE: 158 switch (WC_BIG5_MAP[c]) { 159 case UB: 160 big5u = c; 161 st->state = WC_BIG5_MBYTE1; 162 return NULL; 163 case C1: 164 break; 165 default: 166 Strcat_char(os, (char)c); 167 break; 168 } 169 break; 170 case WC_BIG5_MBYTE1: 171 if (WC_BIG5_MAP[c] & LB) 172 wtf_push(os, WC_CCS_BIG5, ((wc_uint32)big5u << 8) | c); 173 break; 174 } 175 st->state = -1; 176 return os; 177 }