[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package utf8; 2 use strict; 3 use warnings; 4 5 sub DEBUG () { 0 } 6 7 sub DESTROY {} 8 9 my %Cache; 10 11 our (%PropertyAlias, %PA_reverse, %PropValueAlias, %PVA_reverse, %PVA_abbr_map); 12 13 sub croak { require Carp; Carp::croak(@_) } 14 15 ## 16 ## "SWASH" == "SWATCH HASH". A "swatch" is a swatch of the Unicode landscape. 17 ## It's a data structure that encodes a set of Unicode characters. 18 ## 19 20 sub SWASHNEW { 21 my ($class, $type, $list, $minbits, $none) = @_; 22 local $^D = 0 if $^D; 23 24 print STDERR "SWASHNEW @_\n" if DEBUG; 25 26 ## 27 ## Get the list of codepoints for the type. 28 ## Called from swash_init (see utf8.c) or SWASHNEW itself. 29 ## 30 ## Callers of swash_init: 31 ## op.c:pmtrans -- for tr/// and y/// 32 ## regexec.c:regclass_swash -- for /[]/, \p, and \P 33 ## utf8.c:is_utf8_common -- for common Unicode properties 34 ## utf8.c:to_utf8_case -- for lc, uc, ucfirst, etc. and //i 35 ## 36 ## Given a $type, our goal is to fill $list with the set of codepoint 37 ## ranges. If $type is false, $list passed is used. 38 ## 39 ## $minbits: 40 ## For binary properties, $minbits must be 1. 41 ## For character mappings (case and transliteration), $minbits must 42 ## be a number except 1. 43 ## 44 ## $list (or that filled according to $type): 45 ## Refer to perlunicode.pod, "User-Defined Character Properties." 46 ## 47 ## For binary properties, only characters with the property value 48 ## of True should be listed. The 3rd column, if any, will be ignored. 49 ## 50 ## To make the parsing of $type clear, this code takes the a rather 51 ## unorthodox approach of last'ing out of the block once we have the 52 ## info we need. Were this to be a subroutine, the 'last' would just 53 ## be a 'return'. 54 ## 55 my $file; ## file to load data from, and also part of the %Cache key. 56 my $ListSorted = 0; 57 58 if ($type) 59 { 60 $type =~ s/^\s+//; 61 $type =~ s/\s+$//; 62 63 print STDERR "type = $type\n" if DEBUG; 64 65 GETFILE: 66 { 67 ## 68 ## It could be a user-defined property. 69 ## 70 71 my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1); 72 73 if (defined $caller1 && $type =~ /^(?:\w+)$/) { 74 my $prop = "$caller1}::$type"; 75 if (exists &{$prop}) { 76 no strict 'refs'; 77 78 $list = &{$prop}; 79 last GETFILE; 80 } 81 } 82 83 my $wasIs; 84 85 ($wasIs = $type =~ s/^Is(?:\s+|[-_])?//i) 86 or 87 $type =~ s/^(?:(?:General(?:\s+|_)?)?Category|gc)\s*[:=]\s*//i 88 or 89 $type =~ s/^(?:Script|sc)\s*[:=]\s*//i 90 or 91 $type =~ s/^Block\s*[:=]\s*/In/i; 92 93 94 ## 95 ## See if it's in some enumeration. 96 ## 97 require "unicore/PVA.pl"; 98 if ($type =~ /^([\w\s]+)[:=]\s*(.*)/) { 99 my ($enum, $val) = (lc $1, lc $2); 100 $enum =~ tr/ _-//d; 101 $val =~ tr/ _-//d; 102 103 my $pa = $PropertyAlias{$enum} ? $enum : $PA_reverse{$enum}; 104 my $f = $PropValueAlias{$pa}{$val} ? $val : $PVA_reverse{$pa}{lc $val}; 105 106 if ($pa and $f) { 107 $pa = "gc_sc" if $pa eq "gc" or $pa eq "sc"; 108 $file = "unicore/lib/$pa/$PVA_abbr_map{$pa}{lc $f}.pl"; 109 last GETFILE; 110 } 111 } 112 else { 113 my $t = lc $type; 114 $t =~ tr/ _-//d; 115 116 if ($PropValueAlias{gc}{$t} or $PropValueAlias{sc}{$t}) { 117 $file = "unicore/lib/gc_sc/$PVA_abbr_map{gc_sc}{$t}.pl"; 118 last GETFILE; 119 } 120 } 121 122 ## 123 ## See if it's in the direct mapping table. 124 ## 125 require "unicore/Exact.pl"; 126 if (my $base = $utf8::Exact{$type}) { 127 $file = "unicore/lib/gc_sc/$base.pl"; 128 last GETFILE; 129 } 130 131 ## 132 ## If not there exactly, try the canonical form. The canonical 133 ## form is lowercased, with any separators (\s+|[-_]) removed. 134 ## 135 my $canonical = lc $type; 136 $canonical =~ s/(?<=[a-z\d])(?:\s+|[-_])(?=[a-z\d])//g; 137 print STDERR "canonical = $canonical\n" if DEBUG; 138 139 require "unicore/Canonical.pl"; 140 if (my $base = ($utf8::Canonical{$canonical} || $utf8::Canonical{ lc $utf8::PropertyAlias{$canonical} })) { 141 $file = "unicore/lib/gc_sc/$base.pl"; 142 last GETFILE; 143 } 144 145 ## 146 ## See if it's a user-level "To". 147 ## 148 149 my $caller0 = caller(0); 150 151 if (defined $caller0 && $type =~ /^To(?:\w+)$/) { 152 my $map = $caller0 . "::" . $type; 153 154 if (exists &{$map}) { 155 no strict 'refs'; 156 157 $list = &{$map}; 158 last GETFILE; 159 } 160 } 161 162 ## 163 ## Last attempt -- see if it's a standard "To" name 164 ## (e.g. "ToLower") ToTitle is used by ucfirst(). 165 ## The user-level way to access ToDigit() and ToFold() 166 ## is to use Unicode::UCD. 167 ## 168 if ($type =~ /^To(Digit|Fold|Lower|Title|Upper)$/) { 169 $file = "unicore/To/$1.pl"; 170 ## would like to test to see if $file actually exists.... 171 last GETFILE; 172 } 173 174 ## 175 ## If we reach this line, it's because we couldn't figure 176 ## out what to do with $type. Ouch. 177 ## 178 179 return $type; 180 } 181 182 if (defined $file) { 183 print STDERR "found it (file='$file')\n" if DEBUG; 184 185 ## 186 ## If we reach here, it was due to a 'last GETFILE' above 187 ## (exception: user-defined properties and mappings), so we 188 ## have a filename, so now we load it if we haven't already. 189 ## If we have, return the cached results. The cache key is the 190 ## class and file to load. 191 ## 192 my $found = $Cache{$class, $file}; 193 if ($found and ref($found) eq $class) { 194 print STDERR "Returning cached '$file' for \\p{$type}\n" if DEBUG; 195 return $found; 196 } 197 198 $list = do $file; die $@ if $@; 199 } 200 201 $ListSorted = 1; ## we know that these lists are sorted 202 } 203 204 my $extras; 205 my $bits = $minbits; 206 207 my $ORIG = $list; 208 if ($list) { 209 my @tmp = split(/^/m, $list); 210 my %seen; 211 no warnings; 212 $extras = join '', grep /^[^0-9a-fA-F]/, @tmp; 213 $list = join '', 214 map { $_->[1] } 215 sort { $a->[0] <=> $b->[0] } 216 map { /^([0-9a-fA-F]+)/; [ CORE::hex($1), $_ ] } 217 grep { /^([0-9a-fA-F]+)/ and not $seen{$1}++ } @tmp; # XXX doesn't do ranges right 218 } 219 220 if ($none) { 221 my $hextra = sprintf "%04x", $none + 1; 222 $list =~ s/\tXXXX$/\t$hextra/mg; 223 } 224 225 if ($minbits != 1 && $minbits < 32) { # not binary property 226 my $top = 0; 227 while ($list =~ /^([0-9a-fA-F]+)(?:[\t]([0-9a-fA-F]+)?)(?:[ \t]([0-9a-fA-F]+))?/mg) { 228 my $min = CORE::hex $1; 229 my $max = defined $2 ? CORE::hex $2 : $min; 230 my $val = defined $3 ? CORE::hex $3 : 0; 231 $val += $max - $min if defined $3; 232 $top = $val if $val > $top; 233 } 234 my $topbits = 235 $top > 0xffff ? 32 : 236 $top > 0xff ? 16 : 8; 237 $bits = $topbits if $bits < $topbits; 238 } 239 240 my @extras; 241 for my $x ($extras) { 242 pos $x = 0; 243 while ($x =~ /^([^0-9a-fA-F\n])(.*)/mg) { 244 my $char = $1; 245 my $name = $2; 246 print STDERR "$1 => $2\n" if DEBUG; 247 if ($char =~ /[-+!&]/) { 248 my ($c,$t) = split(/::/, $name, 2); # bogus use of ::, really 249 my $subobj; 250 if ($c eq 'utf8') { 251 $subobj = utf8->SWASHNEW($t, "", $minbits, 0); 252 } 253 elsif (exists &$name) { 254 $subobj = utf8->SWASHNEW($name, "", $minbits, 0); 255 } 256 elsif ($c =~ /^([0-9a-fA-F]+)/) { 257 $subobj = utf8->SWASHNEW("", $c, $minbits, 0); 258 } 259 return $subobj unless ref $subobj; 260 push @extras, $name => $subobj; 261 $bits = $subobj->{BITS} if $bits < $subobj->{BITS}; 262 } 263 } 264 } 265 266 print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; 267 268 my $SWASH = bless { 269 TYPE => $type, 270 BITS => $bits, 271 EXTRAS => $extras, 272 LIST => $list, 273 NONE => $none, 274 @extras, 275 } => $class; 276 277 if ($file) { 278 $Cache{$class, $file} = $SWASH; 279 } 280 281 return $SWASH; 282 } 283 284 # Now SWASHGET is recasted into a C function S_swash_get (see utf8.c). 285 286 1;
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |