mk_cp1258_ucs_map.pl (1823B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 @to_ucs = (); 13 %to_ucs2 = (); 14 %from_ucs = (); 15 foreach $name (@NAME) { 16 17 $code = $CODE{$name}; 18 $map = $MAP{$name}; 19 20 print "$name\t$map\t$code\n"; 21 22 open(MAP, "< $map"); 23 while(<MAP>) { 24 /^0/ || next; 25 (($i, $u) = split(" ")) || next; 26 $i = hex($i); 27 $u = hex($u); 28 if ($map =~ /^V/ && $u > 0x7f) { 29 $to_ucs[$i] = $u; 30 $from_ucs{$u} = $i; 31 } 32 if ($map =~ /^c/ && $i > 0x100) { 33 $to_ucs2{$i} = $u; 34 if (! defined($from_ucs{$u})) { 35 $from_ucs{$u} = $i; 36 } 37 } 38 } 39 close(MAP); 40 } 41 42 $name = $NAME[0]; 43 $code = $CODE{$name}; 44 $map = $MAP{$name}; 45 open(OUT, "> ${name}_ucs.map"); 46 47 # print OUT <<EOF; 48 # /* 49 # These conversion tables between $code and 50 # Unicode were made from 51 # 52 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 53 # */ 54 print OUT <<EOF; 55 /* $code */ 56 57 static wc_uint16 ${name}_ucs_map[ 0x80 ] = { 58 EOF 59 60 foreach $i (0x10 .. 0x1F) { 61 print OUT " "; 62 foreach $j (0 .. 7) { 63 $_ = $i * 8 + $j; 64 $u = $to_ucs[$_]; 65 if ($u) { 66 printf OUT " 0x%.4X,", $u; 67 } else { 68 print OUT " 0, "; 69 } 70 } 71 print OUT "\n"; 72 } 73 74 @cp = sort { $a <=> $b } keys %to_ucs2; 75 $cp = @cp + 0; 76 77 print OUT <<EOF; 78 }; 79 80 #define N_${name}2_ucs_map $cp 81 82 static wc_map ${name}2_ucs_map[ N_${name}2_ucs_map ] = { 83 EOF 84 for(@cp) { 85 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $to_ucs2{$_}; 86 } 87 88 @ucs = sort { $a <=> $b } keys %from_ucs; 89 $nucs = @ucs + 0; 90 91 print OUT <<EOF; 92 }; 93 94 #define N_ucs_${name}_map $nucs 95 96 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = { 97 EOF 98 for(@ucs) { 99 printf OUT " { 0x%.4X, 0x%.2X },\n", $_, $from_ucs{$_}; 100 } 101 102 print OUT <<EOF; 103 }; 104 EOF 105 106 __END__ 107 cp1258 VENDORS/MICSFT/WINDOWS/CP1258.TXT CP1258 (Vietnamese) 108 cp1258_2 cp1258_uni.txt CP1258 (Vietnamese)