mk_uhc_ucs_map.pl (1768B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 foreach $name (@NAME) { 13 14 $code = $CODE{$name}; 15 $map = $MAP{$name}; 16 17 print "$name\t$map\t$code\n"; 18 19 %to_ucs = (); 20 %from_ucs = (); 21 open(MAP, "< $map"); 22 open(OUT, "> ${name}_ucs.map"); 23 while(<MAP>) { 24 /^#/ && next; 25 s/#.*//; 26 (($i, $u) = split(" ")) || next; 27 $i = hex($i); 28 $u = hex($u); 29 $a = $i >> 8; 30 $b = $i & 0x00FF; 31 if ($a >= 0xA1 && $a <= 0xFE && $b >= 0xA1 && $b <= 0xFE) { 32 ($i >= 0xA2E6 && $i <= 0xA2E7) || next; 33 } 34 ($u < 0x80) && next; 35 $to_ucs{$i} = $u; 36 $from_ucs{$u} = $i; 37 } 38 39 # print OUT <<EOF; 40 # /* 41 # These conversion tables between $code and 42 # Unicode were made from 43 # 44 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 45 # */ 46 print OUT <<EOF; 47 /* $code */ 48 49 static wc_uint16 ${name}_ucs_map[ 0x20 * 0xB2 + 0x27 * 0x54 + 2 ] = { 50 EOF 51 52 for $ub (0x81 .. 0xA0) { 53 for $lb (0x41 .. 0x5A, 0x61 .. 0x7A, 0x81 .. 0xFE) { 54 $_ = ($ub << 8) + $lb; 55 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 56 } 57 } 58 for $ub (0xA1 .. 0xC7) { 59 for $lb (0x41 .. 0x5A, 0x61 .. 0x7A, 0x81 .. 0xA0) { 60 $_ = ($ub << 8) + $lb; 61 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 62 } 63 if ($ub == 0xA2) { 64 for $lb (0xE6 .. 0xE7) { 65 $_ = ($ub << 8) + $lb; 66 printf OUT " 0x%.4X,\t/* 0x%.4X */\n", $to_ucs{$_}, $_; 67 } 68 } 69 } 70 71 @ucs = sort { $a <=> $b } keys %from_ucs; 72 $nucs = @ucs + 0; 73 74 print OUT <<EOF; 75 }; 76 77 #define N_ucs_${name}_map $nucs 78 79 static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = { 80 EOF 81 for(@ucs) { 82 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_}; 83 } 84 85 print OUT <<EOF; 86 }; 87 EOF 88 89 close(MAP); 90 } 91 92 __END__ 93 uhc VENDORS/MICSFT/WINDOWS/CP949.TXT UHC/CP949 (Korean)