diff options
Diffstat (limited to 'contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl')
-rwxr-xr-x | contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl new file mode 100755 index 00000000..31dd18bd --- /dev/null +++ b/contrib/idn/idnkit-1.0-src/util/generate_nameprep_data.pl @@ -0,0 +1,405 @@ +#! /usr/local/bin/perl -w +# $Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $ +# +# Copyright (c) 2001 Japan Network Information Center. All rights reserved. +# +# By using this file, you agree to the terms and conditions set forth bellow. +# +# LICENSE TERMS AND CONDITIONS +# +# The following License Terms and Conditions apply, unless a different +# license is obtained from Japan Network Information Center ("JPNIC"), +# a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, +# Chiyoda-ku, Tokyo 101-0047, Japan. +# +# 1. Use, Modification and Redistribution (including distribution of any +# modified or derived work) in source and/or binary forms is permitted +# under this License Terms and Conditions. +# +# 2. Redistribution of source code must retain the copyright notices as they +# appear in each source code file, this License Terms and Conditions. +# +# 3. Redistribution in binary form must reproduce the Copyright Notice, +# this License Terms and Conditions, in the documentation and/or other +# materials provided with the distribution. For the purposes of binary +# distribution the "Copyright Notice" refers to the following language: +# "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." +# +# 4. The name of JPNIC may not be used to endorse or promote products +# derived from this Software without specific prior written approval of +# JPNIC. +# +# 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +# ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +# + +use v5.6.0; # for pack('U') +use bytes; + +use lib qw(.); + +use SparseMap; +use Getopt::Long; + +(my $myid = '$Id: generate_nameprep_data.pl,v 1.1.1.1 2003/06/04 00:27:54 marka Exp $') =~ s/\$([^\$]+)\$/\$-$1-\$/; + +my @map_bits = (9, 7, 5); +my @proh_bits = (7, 7, 7); +my @unas_bits = (7, 7, 7); +my @bidi_bits = (9, 7, 5); + +my @bidi_types = ('OTHERS', 'R_AL', 'L'); + +my $dir = '.'; +my @versions = (); + +GetOptions('dir=s', \$dir) or die usage(); +@versions = @ARGV; + +print_header(); + +bits_definition("MAP", @map_bits); +bits_definition("PROH", @proh_bits); +bits_definition("UNAS", @unas_bits); +bits_definition("BIDI", @bidi_bits); + +generate_data($_) foreach @ARGV; + +sub usage { + die "Usage: $0 [-dir dir] version..\n"; +} + +sub generate_data { + my $version = shift; + generate_mapdata($version, "$dir/nameprep.$version.map"); + generate_prohibiteddata($version, "$dir/nameprep.$version.prohibited"); + generate_unassigneddata($version, "$dir/nameprep.$version.unassigned"); + generate_bididata($version, "$dir/nameprep.$version.bidi"); +} + +# +# Generate mapping data. +# +sub generate_mapdata { + my $version = shift; + my $file = shift; + + my $map = SparseMap::Int->new(BITS => [@map_bits], + MAX => 0x110000, + MAPALL => 1, + DEFAULT => 0); + open FILE, $file or die "cannot open $file: $!\n"; + + my $mapbuf = "\0"; # dummy + my %maphash = (); + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_map_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_map($map, \$mapbuf, \%maphash, $_); + } + close FILE; + generate_map($version, $map, \$mapbuf); +} + +# +# Generate prohibited character data. +# +sub generate_prohibiteddata { + my $version = shift; + my $file = shift; + + my $proh = SparseMap::Bit->new(BITS => [@proh_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_prohibited_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_prohibited($proh, $_); + } + close FILE; + generate_prohibited($version, $proh); +} + +# +# Generate unassigned codepoint data. +# +sub generate_unassigneddata { + my $version = shift; + my $file = shift; + + my $unas = SparseMap::Bit->new(BITS => [@unas_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_unassigned_ref($version, $same_as); + close FILE; + return; + } + next; + } + next if /^\#/; + next if /^\s*$/; + register_unassigned($unas, $_); + } + close FILE; + generate_unassigned($version, $unas); +} + +# +# Generate data of bidi "R" or "AL" characters. +# +sub generate_bididata { + my $version = shift; + my $file = shift; + + my $bidi = SparseMap::Int->new(BITS => [@bidi_bits], + MAX => 0x110000); + open FILE, $file or die "cannot open $file: $!\n"; + + my $type = 0; + while (<FILE>) { + if ($. == 1 and /^%\s*SAME-AS\s+(\S+)/) { + my $same_as = $1; + if (grep {$_ eq $same_as} @versions > 0) { + generate_unassigned_ref($version, $same_as); + close FILE; + return; + } + next; + } + if (/^%\s*BIDI_TYPE\s+(\S+)$/) { + my $i = 0; + for ($i = 0; $i < @bidi_types; $i++) { + if ($1 eq $bidi_types[$i]) { + $type = $i; + last; + } + } + die "unrecognized line: $_" if ($i >= @bidi_types); + next; + } + next if /^\#/; + next if /^\s*$/; + register_bidi($bidi, $type, $_); + } + close FILE; + + generate_bidi($version, $bidi); +} + +sub print_header { + print <<"END"; +/* \$Id\$ */ +/* $myid */ +/* + * Do not edit this file! + * This file is generated from NAMEPREP specification. + */ + +END +} + +sub bits_definition { + my $name = shift; + my @bits = @_; + my $i = 0; + + foreach my $n (@bits) { + print "#define ${name}_BITS_$i\t$n\n"; + $i++; + } + print "\n"; +} + +sub register_map { + my ($map, $bufref, $hashref, $line) = @_; + + my ($from, $to) = split /;/, $line; + my @fcode = map {hex($_)} split ' ', $from; + my @tcode = map {hex($_)} split ' ', $to; + + my $ucs4 = pack('V*', @tcode); + $ucs4 =~ s/\000+$//; + + my $offset; + if (exists $hashref->{$ucs4}) { + $offset = $hashref->{$ucs4}; + } else { + $offset = length $$bufref; + $$bufref .= pack('C', length($ucs4)) . $ucs4; + $hashref->{$ucs4} = $offset; + } + + die "unrecognized line: $line" if @fcode != 1; + $map->add($fcode[0], $offset); +} + +sub generate_map { + my ($version, $map, $bufref) = @_; + + $map->fix(); + + print $map->cprog(NAME => "nameprep_${version}_map"); + print "\nstatic const unsigned char nameprep_${version}_map_data[] = \{\n"; + print_uchararray($$bufref); + print "};\n\n"; +} + +sub generate_map_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_map_imap nameprep_${refversion}_map_imap +#define nameprep_${version}_map_table nameprep_${refversion}_map_table +#define nameprep_${version}_map_data nameprep_${refversion}_map_data + +END +} + +sub print_uchararray { + my @chars = unpack 'C*', $_[0]; + my $i = 0; + foreach my $v (@chars) { + if ($i % 12 == 0) { + print "\n" if $i != 0; + print "\t"; + } + printf "%3d, ", $v; + $i++; + } + print "\n"; +} + +sub register_prohibited { + my $proh = shift; + register_bitmap($proh, @_); +} + +sub register_unassigned { + my $unas = shift; + register_bitmap($unas, @_); +} + +sub register_bidi { + my $bidi = shift; + my $type = shift; + register_intmap($bidi, $type, @_); +} + +sub generate_prohibited { + my ($version, $proh) = @_; + generate_bitmap($proh, "nameprep_${version}_prohibited"); + print "\n"; +} + +sub generate_prohibited_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_prohibited_imap nameprep_${refversion}_prohibited_imap +#define nameprep_${version}_prohibited_bitmap nameprep_${refversion}_prohibited_bitmap + +END +} + +sub generate_unassigned { + my ($version, $unas) = @_; + generate_bitmap($unas, "nameprep_${version}_unassigned"); + print "\n"; +} + +sub generate_unassigned_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_unassigned_imap nameprep_${refversion}_unassigned_imap +#define nameprep_${version}_unassigned_bitmap nameprep_${refversion}_unassigned_bitmap + +END +} + +sub generate_bidi { + my ($version, $bidi) = @_; + + $bidi->fix(); + + print $bidi->cprog(NAME => "nameprep_${version}_bidi"); + print "\n"; + print "static const unsigned char nameprep_${version}_bidi_data[] = \{\n"; + + foreach my $type (@bidi_types) { + printf "\tidn_biditype_%s, \n", lc($type); + } + print "};\n\n"; +} + +sub generate_bidi_ref { + my ($version, $refversion) = @_; + print <<"END"; +#define nameprep_${version}_bidi_imap nameprep_${refversion}_bidi_imap +#define nameprep_${version}_bidi_table nameprep_${refversion}_bidi_table + +END +} + +sub register_bitmap { + my $map = shift; + my $line = shift; + + /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; + my $start = hex($1); + my $end = defined($2) ? hex($2) : undef; + if (defined $end) { + $map->add($start .. $end); + } else { + $map->add($start); + } +} + +sub register_intmap { + my $map = shift; + my $value = shift; + my $line = shift; + + /^([0-9A-Fa-f]+)(?:-([0-9A-Fa-f]+))?/ or die "unrecognized line: $line"; + my $start = hex($1); + my $end = defined($2) ? hex($2) : $start; + for (my $i = $start; $i <= $end; $i++) { + $map->add($i, $value); + } +} + +sub generate_bitmap { + my $map = shift; + my $name = shift; + $map->fix(); + #$map->stat(); + print $map->cprog(NAME => $name); +} |