mk_gbk_ucs_map.pl (2366B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 foreach $name (@NAME) { 13 14 $code = $CODE{$name}; 15 $map = $MAP{$name}; 16 17 print "$name\t$map\t$code\n"; 18 19 %to_ucs = (); 20 %from_ucs = (); 21 open(MAP, "< $map"); 22 open(OUT, "> ${name}_ucs.map"); 23 while(<MAP>) { 24 /^#/ && next; 25 s/#.*//; 26 (($i, $u) = split(" ")) || next; 27 $i = hex($i); 28 $u = hex($u); 29 $a = $i >> 8; 30 $b = $i & 0x00FF; 31 if ($a >= 0xA1 && $a <= 0xFE && $b >= 0xA1 && $b <= 0xFE) { 32 ($i >= 0xA2A1 && $i <= 0xA2AA) || 33 ($i >= 0xA6E0 && $i <= 0xA6F5) || 34 ($i >= 0xA8BB && $i <= 0xA8BB) || 35 ($i >= 0xA8BD && $i <= 0xA8BD) || 36 ($i >= 0xA8BE && $i <= 0xA8BE) || 37 ($i >= 0xA8C0 && $i <= 0xA8C0) || next; 38 } 39 ($u < 0x80) && next; 40 $to_ucs{$i} = $u; 41 $from_ucs{$u} = $i; 42 } 43 44 # print OUT <<EOF; 45 # /* 46 # These conversion tables between $code and 47 # Unicode were made from 48 # 49 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 50 # */ 51 print OUT <<EOF; 52 /* $code */ 53 54 #define N_ucs_gbk_80_map 1 55 56 static wc_map ucs_gbk_80_map[ N_ucs_gbk_80_map ] = { 57 { 0x20AC, 0x0080 }, 58 }; 59 60 static wc_uint16 ${name}_ucs_map[ 0x7E * 0xBE - 0x5E * 0x5E + 0x0A + 0x16 + 0x06 ] = { 61 EOF 62 63 for $ub (0x81 .. 0xA0) { 64 for $lb (0x40 .. 0x7E, 0x80 .. 0xFE) { 65 $_ = ($ub << 8) + $lb; 66 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 67 } 68 } 69 for $ub (0xA1 .. 0xFE) { 70 for $lb (0x40 .. 0x7E, 0x80 .. 0xA0) { 71 $_ = ($ub << 8) + $lb; 72 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 73 } 74 if ($ub == 0xA2) { 75 for $lb (0xA1 .. 0xAA) { # 0x0A 76 $_ = ($ub << 8) + $lb; 77 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 78 } 79 } elsif ($ub == 0xA6) { 80 for $lb (0xE0 .. 0xF5) { # 0x16 81 $_ = ($ub << 8) + $lb; 82 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 83 } 84 } elsif ($ub == 0xA8) { 85 for $lb (0xBB .. 0xC0) { # 0x06 86 $_ = ($ub << 8) + $lb; 87 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 88 } 89 } 90 } 91 92 @ucs = sort { $a <=> $b } keys %from_ucs; 93 $nucs = @ucs + 0; 94 95 print OUT <<EOF; 96 }; 97 98 #define N_ucs_${name}_map $nucs 99 100 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = { 101 EOF 102 for(@ucs) { 103 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_}; 104 } 105 106 print OUT <<EOF; 107 }; 108 EOF 109 110 close(MAP); 111 } 112 113 __END__ 114 gbk VENDORS/MICSFT/WINDOWS/CP936.TXT GBK/CP936 (Chinese)