conv.c (3247B)
1 2 #include "wc.h" 3 #include "wtf.h" 4 #include "iso2022.h" 5 #include "hz.h" 6 #ifdef USE_UNICODE 7 #include "ucs.h" 8 #include "utf8.h" 9 #include "utf7.h" 10 #endif 11 12 char *WcReplace = "?"; 13 char *WcReplaceW = "??"; 14 15 static Str wc_conv_to_ces(Str is, wc_ces ces); 16 17 Str 18 wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces) 19 { 20 if (f_ces != WC_CES_WTF) 21 is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces); 22 if (t_ces != WC_CES_WTF) 23 return wc_conv_to_ces(is, t_ces); 24 else 25 return is; 26 } 27 28 Str 29 wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces) 30 { 31 Str os; 32 wc_option opt = WcOption; 33 34 WcOption.strict_iso2022 = WC_TRUE; 35 WcOption.no_replace = WC_TRUE; 36 WcOption.fix_width_conv = WC_FALSE; 37 os = wc_Str_conv(is, f_ces, t_ces); 38 WcOption = opt; 39 return os; 40 } 41 42 static Str 43 wc_conv_to_ces(Str is, wc_ces ces) 44 { 45 Str os; 46 wc_uchar *sp = (wc_uchar *)is->ptr; 47 wc_uchar *ep = sp + is->length; 48 wc_uchar *p; 49 wc_status st; 50 51 switch (ces) { 52 case WC_CES_HZ_GB_2312: 53 for (p = sp; p < ep && *p != '~' && *p < 0x80; p++) 54 ; 55 break; 56 case WC_CES_TCVN_5712: 57 case WC_CES_VISCII_11: 58 case WC_CES_VPS: 59 for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++) 60 ; 61 break; 62 default: 63 for (p = sp; p < ep && *p < 0x80; p++) 64 ; 65 break; 66 } 67 if (p == ep) 68 return is; 69 70 os = Strnew_size(is->length); 71 if (p > sp) 72 p--; /* for precompose */ 73 if (p > sp) 74 Strcat_charp_n(os, is->ptr, (int)(p - sp)); 75 76 wc_output_init(ces, &st); 77 78 switch (ces) { 79 case WC_CES_ISO_2022_JP: 80 case WC_CES_ISO_2022_JP_2: 81 case WC_CES_ISO_2022_JP_3: 82 case WC_CES_ISO_2022_CN: 83 case WC_CES_ISO_2022_KR: 84 case WC_CES_HZ_GB_2312: 85 case WC_CES_TCVN_5712: 86 case WC_CES_VISCII_11: 87 case WC_CES_VPS: 88 #ifdef USE_UNICODE 89 case WC_CES_UTF_8: 90 case WC_CES_UTF_7: 91 #endif 92 while (p < ep) 93 (*st.ces_info->push_to)(os, wtf_parse(&p), &st); 94 break; 95 default: 96 while (p < ep) { 97 if (*p < 0x80 && wtf_width(p + 1)) { 98 Strcat_char(os, (char)*p); 99 p++; 100 } else 101 (*st.ces_info->push_to)(os, wtf_parse(&p), &st); 102 } 103 break; 104 } 105 106 wc_push_end(os, &st); 107 108 return os; 109 } 110 111 Str 112 wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces) 113 { 114 wc_ces detect; 115 116 if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) { 117 *f_ces = WC_CES_WTF; 118 detect = WC_CES_WTF; 119 } else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) { 120 *f_ces = hint; 121 detect = hint; 122 } else { 123 if (*f_ces & WC_CES_T_8BIT) 124 hint = *f_ces; 125 detect = wc_auto_detect(is->ptr, is->length, hint); 126 if (WcOption.auto_detect == WC_OPT_DETECT_ON) { 127 if ((detect & WC_CES_T_8BIT) || 128 ((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT))) 129 *f_ces = detect; 130 } else { 131 if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT)) 132 *f_ces = detect; 133 } 134 } 135 return wc_Str_conv(is, detect, t_ces); 136 } 137 138 void 139 wc_push_end(Str os, wc_status *st) 140 { 141 if (st->ces_info->id & WC_CES_T_ISO_2022) 142 wc_push_to_iso2022_end(os, st); 143 else if (st->ces_info->id == WC_CES_HZ_GB_2312) 144 wc_push_to_hz_end(os, st); 145 #ifdef USE_UNICODE 146 else if (st->ces_info->id == WC_CES_UTF_8) 147 wc_push_to_utf8_end(os, st); 148 else if (st->ces_info->id == WC_CES_UTF_7) 149 wc_push_to_utf7_end(os, st); 150 #endif 151 } 152