mk_johab_ucs_map.pl (1367B)
1 2 @NAME = (); 3 while(<DATA>) { 4 chop; 5 s/\s*$//; 6 (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next; 7 push(@NAME, $n); 8 $MAP{$n} = $m; 9 $CODE{$n} = $c; 10 } 11 12 foreach $name (@NAME) { 13 14 $code = $CODE{$name}; 15 $map = $MAP{$name}; 16 17 print "$name\t$map\t$code\n"; 18 19 %to_ucs = (); 20 %from_ucs = (); 21 open(MAP, "< $map"); 22 open(OUT, "> ${name}_ucs.map"); 23 while(<MAP>) { 24 /^#/ && next; 25 s/#.*//; 26 (($i, $u) = split(" ")) || next; 27 $i = hex($i); 28 $u = hex($u); 29 ($u >= 0xAC00 && $u <= 0xD7A3) && next; 30 ($i >= 0xD800) && next; 31 ($u < 0x80) && next; 32 $from_ucs{$u} = $i; 33 if ($i >= 0x80) { 34 $to_ucs{$i} = $u; 35 } 36 } 37 38 # print OUT <<EOF; 39 # /* 40 # These conversion tables between $code and 41 # Unicode were made from 42 # 43 # ftp://ftp.unicode.org/Public/MAPPINGS/$map. 44 # */ 45 46 @ucs = sort { $a <=> $b } keys %to_ucs; 47 $nucs = @ucs + 0; 48 49 print OUT <<EOF; 50 /* $code */ 51 52 #define N_${name}2_ucs_map $nucs 53 54 static wc_map ${name}2_ucs_map[ N_${name}2_ucs_map ] = { 55 EOF 56 for(@ucs) { 57 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $to_ucs{$_}; 58 } 59 60 @ucs = sort { $a <=> $b } keys %from_ucs; 61 $nucs = @ucs + 0; 62 63 print OUT <<EOF; 64 }; 65 66 #define N_ucs_${name}2_map $nucs 67 68 static wc_map ucs_${name}2_map[ N_ucs_${name}2_map ] = { 69 EOF 70 for(@ucs) { 71 printf OUT " { 0x%.4X, 0x%.4X },\n", $_, $from_ucs{$_}; 72 } 73 74 print OUT <<EOF; 75 }; 76 EOF 77 78 close(MAP); 79 } 80 81 __END__ 82 johab EASTASIA/KSC/JOHAB.TXT Johab (Korean)