mk_sjis_ucs_map.pl (1663B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 foreach $name (@NAME) { 13 14 $code = $CODE{$name}; 15 $map = $MAP{$name}; 16 17 print "$name\t$map\t$code\n"; 18 19 %to_ucs = (); 20 %from_ucs = (); 21 open(MAP, "< $map"); 22 open(OUT, "> ${name}_ucs.map"); 23 while(<MAP>) { 24 /^#/ && next; 25 s/#.*//; 26 (($i, $u) = split(" ")) || next; 27 $i = hex($i); 28 $u = hex($u); 29 ($i >= 0x8740 && $i <= 0x87FC) || 30 ($i >= 0xED40 && $i <= 0xEEFC) || 31 ($i >= 0xFA40 && $i <= 0xFCFC) || next; 32 $to_ucs{$i} = $u; 33 if ($u > 0 && (! $from_ucs{$u} || ($from_ucs{$u} >= 0xED40 && $from_ucs{$u} <= 0xEEFC))) { 34 $from_ucs{$u} = $i; 35 } 36 } 37 38 # print OUT <<EOF; 39 # /* 40 # These conversion tables between $code and 41 # Unicode were made from 42 # 43 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 44 # */ 45 print OUT <<EOF; 46 /* $code */ 47 48 static wc_uint16 ${name}_ucs_map[ 0x5E * 10 ] = { 49 EOF 50 51 for $ub (0x87, 0xed, 0xee, 0xfa, 0xfb, 0xfc) { 52 for $lb (0x40 .. 0x7E, 0x80 .. 0x9E) { 53 $_ = ($ub << 8) + $lb; 54 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 55 } 56 if ($ub == 0x87 || $ub == 0xfc) { 57 next; 58 } 59 for $lb (0x9F .. 0xFC) { 60 $_ = ($ub << 8) + $lb; 61 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 62 } 63 } 64 65 @ucs = sort { $a <=> $b } keys %from_ucs; 66 $nucs = @ucs + 0; 67 68 print OUT <<EOF; 69 }; 70 71 #define N_ucs_${name}_map $nucs 72 73 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = { 74 EOF 75 for(@ucs) { 76 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_}; 77 } 78 79 print OUT <<EOF; 80 }; 81 EOF 82 83 close(MAP); 84 } 85 86 __END__ 87 sjis_ext VENDORS/MICSFT/WINDOWS/CP932.TXT Shift_JIS/CP932 (Japanese)