hz.c (3539B)
1 2 #include "wc.h" 3 #include "iso2022.h" 4 #include "hz.h" 5 #include "wtf.h" 6 #ifdef USE_UNICODE 7 #include "ucs.h" 8 #endif 9 10 Str 11 wc_conv_from_hz(Str is, wc_ces ces) 12 { 13 Str os; 14 wc_uchar *sp = (wc_uchar *)is->ptr; 15 wc_uchar *ep = sp + is->length; 16 wc_uchar *p; 17 int state = WC_HZ_NOSTATE; 18 19 for (p = sp; p < ep && *p < 0x80 && *p != WC_C_HZ_TILDA; p++) 20 ; 21 if (p == ep) 22 return is; 23 os = Strnew_size(is->length); 24 if (p > sp) 25 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 26 27 for (; p < ep; p++) { 28 switch (state) { 29 case WC_HZ_NOSTATE: 30 if (*p == WC_C_HZ_TILDA) 31 state = WC_HZ_TILDA; 32 else if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) 33 state = WC_HZ_MBYTE1_GR; /* GB 2312 ? */ 34 else if (*p & 0x80) 35 wtf_push_unknown(os, p, 1); 36 else 37 Strcat_char(os, (char)*p); 38 break; 39 case WC_HZ_TILDA: 40 if (*p == WC_C_HZ_SI) { 41 state = WC_HZ_MBYTE; 42 break; 43 } else if (*p == WC_C_HZ_TILDA) 44 Strcat_char(os, (char)*p); 45 else if (*p == '\n') 46 break; 47 else 48 wtf_push_unknown(os, p-1, 2); 49 state = WC_HZ_NOSTATE; 50 break; 51 case WC_HZ_TILDA_MB: 52 if (*p == WC_C_HZ_SO || *p == '\n') { 53 state = WC_HZ_NOSTATE; 54 break; 55 } 56 else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL) 57 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p); 58 else 59 wtf_push_unknown(os, p-1, 2); 60 state = WC_HZ_MBYTE; 61 break; 62 case WC_HZ_MBYTE: 63 if (*p == WC_C_HZ_TILDA) 64 state = WC_HZ_TILDA_MB; 65 else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL) 66 state = WC_HZ_MBYTE1; 67 else 68 wtf_push_unknown(os, p, 1); 69 break; 70 case WC_HZ_MBYTE1: 71 if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL) 72 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p); 73 else 74 wtf_push_unknown(os, p-1, 2); 75 state = WC_HZ_MBYTE; 76 break; 77 case WC_HZ_MBYTE1_GR: 78 if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) 79 wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p); 80 else 81 wtf_push_unknown(os, p-1, 2); 82 state = WC_HZ_NOSTATE; 83 break; 84 } 85 } 86 switch (state) { 87 case WC_HZ_TILDA: 88 case WC_HZ_TILDA_MB: 89 case WC_HZ_MBYTE1: 90 case WC_HZ_MBYTE1_GR: 91 wtf_push_unknown(os, p-1, 1); 92 break; 93 } 94 return os; 95 } 96 97 void 98 wc_push_to_hz(Str os, wc_wchar_t cc, wc_status *st) 99 { 100 while (1) { 101 switch (cc.ccs) { 102 case WC_CCS_US_ASCII: 103 if (st->gl) { 104 Strcat_char(os, WC_C_HZ_TILDA); 105 Strcat_char(os, WC_C_HZ_SO); 106 st->gl = 0; 107 } 108 if ((char)cc.code == WC_C_HZ_TILDA) 109 Strcat_char(os, WC_C_HZ_TILDA); 110 Strcat_char(os, (char)cc.code); 111 return; 112 case WC_CCS_GB_2312: 113 if (! st->gl) { 114 Strcat_char(os, WC_C_HZ_TILDA); 115 Strcat_char(os, WC_C_HZ_SI); 116 st->gl = 1; 117 } 118 Strcat_char(os, (char)((cc.code >> 8) & 0x7f)); 119 Strcat_char(os, (char)(cc.code & 0x7f)); 120 return; 121 case WC_CCS_UNKNOWN_W: 122 if (WcOption.no_replace) 123 return; 124 if (st->gl) { 125 Strcat_char(os, WC_C_HZ_TILDA); 126 Strcat_char(os, WC_C_HZ_SO); 127 st->gl = 0; 128 } 129 Strcat_charp(os, WC_REPLACE_W); 130 return; 131 case WC_CCS_UNKNOWN: 132 if (WcOption.no_replace) 133 return; 134 if (st->gl) { 135 Strcat_char(os, WC_C_HZ_TILDA); 136 Strcat_char(os, WC_C_HZ_SO); 137 st->gl = 0; 138 } 139 Strcat_charp(os, WC_REPLACE); 140 return; 141 default: 142 #ifdef USE_UNICODE 143 if (WcOption.ucs_conv) 144 cc = wc_any_to_any_ces(cc, st); 145 else 146 #endif 147 cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; 148 continue; 149 } 150 } 151 } 152 153 void 154 wc_push_to_hz_end(Str os, wc_status *st) 155 { 156 if (st->gl) { 157 Strcat_char(os, WC_C_HZ_TILDA); 158 Strcat_char(os, WC_C_HZ_SO); 159 st->gl = 0; 160 } 161 }