[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 # $File: //member/autrijus/.vimrc $ $Author: autrijus $ 2 # $Revision: #1 $ $Change: 1649 $ $DateTime: 2002/10/24 15:21:23 $ 3 4 package Encode::compat::Alias; 5 our $VERSION = '0.05'; 6 7 1; 8 9 package Encode::Alias; 10 use strict; 11 our $VERSION = '0.05'; 12 our $DEBUG = 0; 13 14 use base qw(Exporter); 15 16 # Public, encouraged API is exported by default 17 18 our @EXPORT = 19 qw ( 20 define_alias 21 find_alias 22 ); 23 24 our @Alias; # ordered matching list 25 our %Alias; # cached known aliases 26 27 sub find_alias 28 { 29 my $class = shift; 30 local $_ = shift; 31 unless (exists $Alias{$_}) 32 { 33 $Alias{$_} = undef; # Recursion guard 34 for (my $i=0; $i < @Alias; $i += 2) 35 { 36 my $alias = $Alias[$i]; 37 my $val = $Alias[$i+1]; 38 my $new; 39 if (ref($alias) eq 'Regexp' && $_ =~ $alias) 40 { 41 $DEBUG and warn "eval $val"; 42 $new = eval $val; 43 # $@ and warn "$val, $@"; 44 } 45 elsif (ref($alias) eq 'CODE') 46 { 47 $DEBUG and warn "$alias", "->", "($val)"; 48 $new = $alias->($val); 49 } 50 elsif (lc($_) eq lc($alias)) 51 { 52 $new = $val; 53 } 54 if (defined($new)) 55 { 56 next if $new eq $_; # avoid (direct) recursion on bugs 57 $DEBUG and warn "$alias, $new"; 58 my $enc = (ref($new)) ? $new : Encode::find_encoding($new); 59 if ($enc) 60 { 61 $Alias{$_} = $enc; 62 last; 63 } 64 } 65 } 66 } 67 if ($DEBUG){ 68 my $name; 69 if (my $e = $Alias{$_}){ 70 $name = $e->name; 71 }else{ 72 $name = ""; 73 } 74 warn "find_alias($class, $_)->name = $name"; 75 } 76 return $Alias{$_}; 77 } 78 79 sub define_alias 80 { 81 while (@_) 82 { 83 my ($alias,$name) = splice(@_,0,2); 84 unshift(@Alias, $alias => $name); # newer one has precedence 85 # clear %Alias cache to allow overrides 86 if (ref($alias)){ 87 my @a = keys %Alias; 88 for my $k (@a){ 89 if (ref($alias) eq 'Regexp' && $k =~ $alias) 90 { 91 $DEBUG and warn "delete \$Alias\{$k\}"; 92 delete $Alias{$k}; 93 } 94 elsif (ref($alias) eq 'CODE') 95 { 96 $DEBUG and warn "delete \$Alias\{$k\}"; 97 delete $Alias{$alias->($name)}; 98 } 99 } 100 }else{ 101 $DEBUG and warn "delete \$Alias\{$alias\}"; 102 delete $Alias{$alias}; 103 } 104 } 105 } 106 107 # Allow latin-1 style names as well 108 # 0 1 2 3 4 5 6 7 8 9 10 109 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); 110 # Allow winlatin1 style names as well 111 our %Winlatin2cp = ( 112 'latin1' => 1252, 113 'latin2' => 1250, 114 'cyrillic' => 1251, 115 'greek' => 1253, 116 'turkish' => 1254, 117 'hebrew' => 1255, 118 'arabic' => 1256, 119 'baltic' => 1257, 120 'vietnamese' => 1258, 121 ); 122 123 init_aliases(); 124 125 sub undef_aliases{ 126 @Alias = (); 127 %Alias = (); 128 } 129 130 sub init_aliases 131 { 132 undef_aliases(); 133 134 # Try all-lower-case version should all else fails 135 define_alias( qr/^(.*)$/ => '"\L$1"' ); 136 137 # UTF/UCS stuff 138 define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); 139 define_alias( qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', 140 qr/^UCS-?4-?(BE|LE)?$/i => 'uc("UTF-32$1")', 141 qr/^iso-10646-1$/i => '"UCS-2BE"' ); 142 define_alias( qr/^UTF(16|32)-?BE$/i => '"UTF-$1BE"', 143 qr/^UTF(16|32)-?LE$/i => '"UTF-$1LE"', 144 qr/^UTF(16|32)$/i => '"UTF-$1"', 145 ); 146 # ASCII 147 define_alias(qr/^(?:US-?)ascii$/i => '"ascii"'); 148 define_alias('C' => 'ascii'); 149 define_alias(qr/\bISO[-_]?646[-_]?US$/i => '"ascii"'); 150 # Allow variants of iso-8859-1 etc. 151 define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); 152 153 # At least HP-UX has these. 154 define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' ); 155 156 # More HP stuff. 157 define_alias( qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => '"$1}8"' ); 158 159 # The Official name of ASCII. 160 define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); 161 162 # This is a font issue, not an encoding issue. 163 # (The currency symbol of the Latin 1 upper half 164 # has been redefined as the euro symbol.) 165 define_alias( qr/^(.+)\@euro$/i => '"$1"' ); 166 167 define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i 168 => 'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' ); 169 170 define_alias( qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish| 171 hebrew|arabic|baltic|vietnamese)$/ix => 172 '"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' ); 173 174 # Common names for non-latin prefered MIME names 175 define_alias( 'ascii' => 'US-ascii', 176 'cyrillic' => 'iso-8859-5', 177 'arabic' => 'iso-8859-6', 178 'greek' => 'iso-8859-7', 179 'hebrew' => 'iso-8859-8', 180 'thai' => 'iso-8859-11', 181 'tis620' => 'iso-8859-11', 182 ); 183 184 # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. 185 # And Microsoft has their own naming (again, surprisingly). 186 # And windows-* is registered in IANA! 187 define_alias( qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"'); 188 189 # Sometimes seen with a leading zero. 190 # define_alias( qr/\bcp037\b/i => '"cp37"'); 191 192 # Mac Mappings 193 # predefined in *.ucm; unneeded 194 # define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); 195 define_alias( qr/^mac_(.*)$/i => '"mac$1"'); 196 # Ououououou. gone. They are differente! 197 # define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); 198 199 # Standardize on the dashed versions. 200 # define_alias( qr/\butf8$/i => 'utf-8' ); 201 define_alias( qr/\bkoi8r$/i => 'koi8-r' ); 202 define_alias( qr/\bkoi8u$/i => 'koi8-u' ); 203 204 unless ($Encode::ON_EBCDIC){ 205 # for Encode::CN 206 define_alias( qr/\beuc.*cn$/i => '"euc-cn"' ); 207 define_alias( qr/\bcn.*euc$/i => '"euc-cn"' ); 208 # define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' ) 209 # CP936 doesn't have vendor-addon for GBK, so they're identical. 210 define_alias( qr/^gbk$/i => '"cp936"'); 211 # This fixes gb2312 vs. euc-cn confusion, practically 212 define_alias( qr/\bGB[-_ ]?2312(?:\D.*$|$)/i => '"euc-cn"' ); 213 # for Encode::JP 214 define_alias( qr/\bjis$/i => '"7bit-jis"' ); 215 define_alias( qr/\beuc.*jp$/i => '"euc-jp"' ); 216 define_alias( qr/\bjp.*euc$/i => '"euc-jp"' ); 217 define_alias( qr/\bujis$/i => '"euc-jp"' ); 218 define_alias( qr/\bshift.*jis$/i => '"shiftjis"' ); 219 define_alias( qr/\bsjis$/i => '"shiftjis"' ); 220 # for Encode::KR 221 define_alias( qr/\beuc.*kr$/i => '"euc-kr"' ); 222 define_alias( qr/\bkr.*euc$/i => '"euc-kr"' ); 223 # This fixes ksc5601 vs. euc-kr confusion, practically 224 define_alias( qr/(?:x-)?uhc$/i => '"cp949"' ); 225 define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' ); 226 define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' ); 227 # for Encode::TW 228 define_alias( qr/\bbig-?5$/i => '"big5-eten"' ); 229 define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' ); 230 define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' ); 231 define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' ); 232 define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); 233 } 234 # utf8 is blessed :) 235 define_alias( qr/^UTF-8$/i => '"utf8"',); 236 # At last, Map white space and _ to '-' 237 define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); 238 } 239 240 1; 241 __END__ 242 243 # TODO: HP-UX '8' encodings arabic8 greek8 hebrew8 kana8 thai8 turkish8 244 # TODO: HP-UX '15' encodings japanese15 korean15 roi15 245 # TODO: Cyrillic encoding ISO-IR-111 (useful?) 246 # TODO: Armenian encoding ARMSCII-8 247 # TODO: Hebrew encoding ISO-8859-8-1 248 # TODO: Thai encoding TCVN 249 # TODO: Vietnamese encodings VPS 250 # TODO: Mac Asian+African encodings: Arabic Armenian Bengali Burmese 251 # ChineseSimp ChineseTrad Devanagari Ethiopic ExtArabic 252 # Farsi Georgian Gujarati Gurmukhi Hebrew Japanese 253 # Kannada Khmer Korean Laotian Malayalam Mongolian 254 # Oriya Sinhalese Symbol Tamil Telugu Tibetan Vietnamese 255 256 =head1 NAME 257 258 Encode::Alias - alias definitions to encodings 259 260 =head1 SYNOPSIS 261 262 use Encode; 263 use Encode::Alias; 264 define_alias( newName => ENCODING); 265 266 =head1 DESCRIPTION 267 268 Allows newName to be used as an alias for ENCODING. ENCODING may be 269 either the name of an encoding or an encoding object (as described 270 in L<Encode>). 271 272 Currently I<newName> can be specified in the following ways: 273 274 =over 4 275 276 =item As a simple string. 277 278 =item As a qr// compiled regular expression, e.g.: 279 280 define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); 281 282 In this case, if I<ENCODING> is not a reference, it is C<eval>-ed 283 in order to allow C<$1> etc. to be substituted. The example is one 284 way to alias names as used in X11 fonts to the MIME names for the 285 iso-8859-* family. Note the double quotes inside the single quotes. 286 287 If you are using a regex here, you have to use the quotes as shown or 288 it won't work. Also note that regex handling is tricky even for the 289 experienced. Use it with caution. 290 291 =item As a code reference, e.g.: 292 293 define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); 294 295 In this case, C<$_> will be set to the name that is being looked up and 296 I<ENCODING> is passed to the sub as its first argument. The example 297 is another way to alias names as used in X11 fonts to the MIME names 298 for the iso-8859-* family. 299 300 =back 301 302 =head2 Alias overloading 303 304 You can override predefined aliases by simply applying define_alias(). 305 The new alias is always evaluated first, and when neccessary, 306 define_alias() flushes the internal cache to make the new definition 307 available. 308 309 # redirect SHIFT_JIS to MS/IBM Code Page 932, which is a 310 # superset of SHIFT_JIS 311 312 define_alias( qr/shift.*jis$/i => '"cp932"' ); 313 define_alias( qr/sjis$/i => '"cp932"' ); 314 315 If you want to zap all predefined aliases, you can use 316 317 Encode::Alias->undef_aliases; 318 319 to do so. And 320 321 Encode::Alias->init_aliases; 322 323 gets the factory settings back. 324 325 =head1 SEE ALSO 326 327 L<Encode>, L<Encode::Supported> 328 329 =cut 330
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |