w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

conv.c (3247B)


      1 
      2 #include "wc.h"
      3 #include "wtf.h"
      4 #include "iso2022.h"
      5 #include "hz.h"
      6 #ifdef USE_UNICODE
      7 #include "ucs.h"
      8 #include "utf8.h"
      9 #include "utf7.h"
     10 #endif
     11 
     12 char *WcReplace = "?";
     13 char *WcReplaceW = "??";
     14 
     15 static Str wc_conv_to_ces(Str is, wc_ces ces);
     16 
     17 Str
     18 wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces)
     19 {
     20     if (f_ces != WC_CES_WTF)
     21 	is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces);
     22     if (t_ces != WC_CES_WTF)
     23 	return wc_conv_to_ces(is, t_ces);
     24     else
     25 	return is;
     26 }
     27 
     28 Str
     29 wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces)
     30 {
     31     Str os;
     32     wc_option opt = WcOption;
     33 
     34     WcOption.strict_iso2022 = WC_TRUE;
     35     WcOption.no_replace = WC_TRUE;
     36     WcOption.fix_width_conv = WC_FALSE;
     37     os = wc_Str_conv(is, f_ces, t_ces);
     38     WcOption = opt;
     39     return os;
     40 }
     41 
     42 static Str
     43 wc_conv_to_ces(Str is, wc_ces ces)
     44 {
     45     Str os;
     46     wc_uchar *sp = (wc_uchar *)is->ptr;
     47     wc_uchar *ep = sp + is->length;
     48     wc_uchar *p;
     49     wc_status st;
     50 
     51     switch (ces) {
     52     case WC_CES_HZ_GB_2312:
     53 	for (p = sp; p < ep && *p != '~' && *p < 0x80; p++)
     54 	    ;
     55 	break;
     56     case WC_CES_TCVN_5712:
     57     case WC_CES_VISCII_11:
     58     case WC_CES_VPS:
     59 	for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++)
     60 	    ;
     61 	break;
     62     default:
     63 	for (p = sp; p < ep && *p < 0x80; p++)
     64 	    ;
     65 	break;
     66     }
     67     if (p == ep)
     68 	return is;
     69 
     70     os = Strnew_size(is->length);
     71     if (p > sp)
     72 	p--;	/* for precompose */
     73     if (p > sp)
     74 	Strcat_charp_n(os, is->ptr, (int)(p - sp));
     75 
     76     wc_output_init(ces, &st);
     77 
     78     switch (ces) {
     79     case WC_CES_ISO_2022_JP:
     80     case WC_CES_ISO_2022_JP_2:
     81     case WC_CES_ISO_2022_JP_3:
     82     case WC_CES_ISO_2022_CN:
     83     case WC_CES_ISO_2022_KR:
     84     case WC_CES_HZ_GB_2312:
     85     case WC_CES_TCVN_5712:
     86     case WC_CES_VISCII_11:
     87     case WC_CES_VPS:
     88 #ifdef USE_UNICODE
     89     case WC_CES_UTF_8:
     90     case WC_CES_UTF_7:
     91 #endif
     92 	while (p < ep)
     93 	    (*st.ces_info->push_to)(os, wtf_parse(&p), &st);
     94 	break;
     95     default:
     96 	while (p < ep) {
     97 	    if (*p < 0x80 && wtf_width(p + 1)) {
     98 		Strcat_char(os, (char)*p);
     99 		p++;
    100 	    } else
    101 		(*st.ces_info->push_to)(os, wtf_parse(&p), &st);
    102 	}
    103 	break;
    104     }
    105 
    106     wc_push_end(os, &st);
    107 
    108     return os;
    109 }
    110 
    111 Str
    112 wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces)
    113 {
    114     wc_ces detect;
    115 
    116     if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) {
    117 	*f_ces = WC_CES_WTF;
    118 	detect = WC_CES_WTF;
    119     } else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) {
    120 	*f_ces = hint;
    121 	detect = hint;
    122     } else {
    123 	if (*f_ces & WC_CES_T_8BIT)
    124 	    hint = *f_ces;
    125 	detect = wc_auto_detect(is->ptr, is->length, hint);
    126 	if (WcOption.auto_detect == WC_OPT_DETECT_ON) {
    127 	    if ((detect & WC_CES_T_8BIT) ||
    128 		((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT)))
    129 		*f_ces = detect;
    130 	} else {
    131 	    if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT))
    132 		*f_ces = detect;
    133 	}
    134     }
    135     return wc_Str_conv(is, detect, t_ces);
    136 }
    137 
    138 void
    139 wc_push_end(Str os, wc_status *st)
    140 {
    141     if (st->ces_info->id & WC_CES_T_ISO_2022)
    142 	wc_push_to_iso2022_end(os, st);
    143     else if (st->ces_info->id == WC_CES_HZ_GB_2312)
    144 	wc_push_to_hz_end(os, st);
    145 #ifdef USE_UNICODE
    146     else if (st->ces_info->id == WC_CES_UTF_8)
    147 	wc_push_to_utf8_end(os, st);
    148     else if (st->ces_info->id == WC_CES_UTF_7)
    149 	wc_push_to_utf7_end(os, st);
    150 #endif
    151 }
    152