mk_gb_ucs_map.pl (1591B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 foreach $name (@NAME) { 13 14 $code = $CODE{$name}; 15 $map = $MAP{$name}; 16 17 print "$name\t$map\t$code\n"; 18 19 @to_ucs = (); 20 %from_ucs = (); 21 open(MAP, "< $map"); 22 open(OUT, "> ${name}_ucs.map"); 23 while(<MAP>) { 24 /^#/ && next; 25 s/#.*//; 26 (($i, $u) = split(" ")) || next; 27 $i = hex($i); 28 $u = hex($u); 29 $to_ucs[$i] = $u; 30 if ($u > 0) { 31 $from_ucs{$u} = $i; 32 } 33 } 34 35 # compatibility with GBK(CP936), GB18030 36 delete $from_ucs{$to_ucs[0x2124]}; 37 delete $from_ucs{$to_ucs[0x212A]}; 38 $from_ucs{0x00B7} = 0x2124; 39 $from_ucs{0x2014} = 0x212A; 40 $to_ucs[0x2124] = 0x00B7; 41 $to_ucs[0x212A] = 0x2014; 42 43 # print OUT <<EOF; 44 # /* 45 # These conversion tables between $code and 46 # Unicode were made from 47 # 48 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 49 # */ 50 print OUT <<EOF; 51 /* $code */ 52 53 static wc_uint16 ${name}_ucs_map[ 0x5E * 0x5E ] = { 54 EOF 55 56 for $i (0x21 .. 0x7E) { 57 for $j (0x21 .. 0x7E) { 58 $_ = $i * 0x100 + $j; 59 $u = $to_ucs[$_]; 60 if ($u) { 61 printf OUT " 0x%.4X,", $u; 62 } else { 63 print OUT " 0,\t"; 64 } 65 printf OUT "\t/* 0x%.4X */\n", $_; 66 } 67 } 68 69 @ucs = sort { $a <=> $b } keys %from_ucs; 70 $nucs = @ucs + 0; 71 72 print OUT <<EOF; 73 }; 74 75 #define N_ucs_${name}_map $nucs 76 77 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = { 78 EOF 79 for(@ucs) { 80 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_}; 81 } 82 83 print OUT <<EOF; 84 }; 85 EOF 86 87 close(MAP); 88 } 89 90 __END__ 91 gb2312 EASTASIA/GB/GB2312.TXT GB 2312 (Chinese) 92 gb12345 EASTASIA/GB/GB12345.TXT GB 12345 (Chinese)