[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/pod/ -> Escapes.pm (source)

   1  
   2  require 5;
   3  #                        The documentation is at the end.
   4  # Time-stamp: "2004-05-07 15:31:25 ADT"
   5  package Pod::Escapes;
   6  require Exporter;
   7  @ISA = ('Exporter');
   8  $VERSION = '1.04';
   9  @EXPORT_OK = qw(
  10    %Code2USASCII
  11    %Name2character
  12    %Name2character_number
  13    %Latin1Code_to_fallback
  14    %Latin1Char_to_fallback
  15    e2char
  16    e2charnum
  17  );
  18  %EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
  19  
  20  #==========================================================================
  21  
  22  use strict;
  23  use vars qw(
  24    %Code2USASCII
  25    %Name2character
  26    %Name2character_number
  27    %Latin1Code_to_fallback
  28    %Latin1Char_to_fallback
  29    $FAR_CHAR
  30    $FAR_CHAR_NUMBER
  31    $NOT_ASCII
  32  );
  33  
  34  $FAR_CHAR = "?" unless defined $FAR_CHAR;
  35  $FAR_CHAR_NUMBER = ord($FAR_CHAR) unless defined $FAR_CHAR_NUMBER;
  36  
  37  $NOT_ASCII = 'A' ne chr(65) unless defined $NOT_ASCII;
  38  
  39  #--------------------------------------------------------------------------
  40  sub e2char {
  41    my $in = $_[0];
  42    return undef unless defined $in and length $in;
  43    
  44    # Convert to decimal:
  45    if($in =~ m/^(0[0-7]*)$/s ) {
  46      $in = oct $in;
  47    } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
  48      $in = hex $1;
  49    } # else it's decimal, or named
  50  
  51    if($NOT_ASCII) {
  52      # We're in bizarro world of not-ASCII!
  53      # Cope with US-ASCII codes, use fallbacks for Latin-1, or use FAR_CHAR.
  54      unless($in =~ m/^\d+$/s) {
  55        # It's a named character reference.  Get its numeric Unicode value.
  56        $in = $Name2character{$in};
  57        return undef unless defined $in;  # (if there's no such name)
  58        $in = ord $in; # (All ents must be one character long.)
  59          # ...So $in holds the char's US-ASCII numeric value, which we'll
  60          #  now go get the local equivalent for.
  61      }
  62  
  63      # It's numeric, whether by origin or by mutation from a known name
  64      return $Code2USASCII{$in} # so "65" => "A" everywhere
  65          || $Latin1Code_to_fallback{$in} # Fallback.
  66          || $FAR_CHAR; # Fall further back
  67    }
  68    
  69    # Normal handling:
  70    if($in =~ m/^\d+$/s) {
  71      if($] < 5.007  and  $in > 255) { # can't be trusted with Unicode
  72        return $FAR_CHAR;
  73      } else {
  74        return chr($in);
  75      }
  76    } else {
  77      return $Name2character{$in}; # returns undef if unknown
  78    }
  79  }
  80  
  81  #--------------------------------------------------------------------------
  82  sub e2charnum {
  83    my $in = $_[0];
  84    return undef unless defined $in and length $in;
  85    
  86    # Convert to decimal:
  87    if($in =~ m/^(0[0-7]*)$/s ) {
  88      $in = oct $in;
  89    } elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
  90      $in = hex $1;
  91    } # else it's decimal, or named
  92  
  93    if($in =~ m/^\d+$/s) {
  94      return 0 + $in;
  95    } else {
  96      return $Name2character_number{$in}; # returns undef if unknown
  97    }
  98  }
  99  
 100  #--------------------------------------------------------------------------
 101  
 102  %Name2character_number = (
 103   # General XML/XHTML:
 104   'lt'   => 60,
 105   'gt'   => 62,
 106   'quot' => 34,
 107   'amp'  => 38,
 108   'apos' => 39,
 109  
 110   # POD-specific:
 111   'sol'    => 47,
 112   'verbar' => 124,
 113  
 114   'lchevron' => 171, # legacy for laquo
 115   'rchevron' => 187, # legacy for raquo
 116  
 117   # Remember, grave looks like \ (as in virtu\)
 118   #           acute looks like / (as in re/sume/)
 119   #           circumflex looks like ^ (as in papier ma^che/)
 120   #           umlaut/dieresis looks like " (as in nai"ve, Chloe")
 121  
 122   # From the XHTML 1 .ent files:
 123   'nbsp'     , 160,
 124   'iexcl'    , 161,
 125   'cent'     , 162,
 126   'pound'    , 163,
 127   'curren'   , 164,
 128   'yen'      , 165,
 129   'brvbar'   , 166,
 130   'sect'     , 167,
 131   'uml'      , 168,
 132   'copy'     , 169,
 133   'ordf'     , 170,
 134   'laquo'    , 171,
 135   'not'      , 172,
 136   'shy'      , 173,
 137   'reg'      , 174,
 138   'macr'     , 175,
 139   'deg'      , 176,
 140   'plusmn'   , 177,
 141   'sup2'     , 178,
 142   'sup3'     , 179,
 143   'acute'    , 180,
 144   'micro'    , 181,
 145   'para'     , 182,
 146   'middot'   , 183,
 147   'cedil'    , 184,
 148   'sup1'     , 185,
 149   'ordm'     , 186,
 150   'raquo'    , 187,
 151   'frac14'   , 188,
 152   'frac12'   , 189,
 153   'frac34'   , 190,
 154   'iquest'   , 191,
 155   'Agrave'   , 192,
 156   'Aacute'   , 193,
 157   'Acirc'    , 194,
 158   'Atilde'   , 195,
 159   'Auml'     , 196,
 160   'Aring'    , 197,
 161   'AElig'    , 198,
 162   'Ccedil'   , 199,
 163   'Egrave'   , 200,
 164   'Eacute'   , 201,
 165   'Ecirc'    , 202,
 166   'Euml'     , 203,
 167   'Igrave'   , 204,
 168   'Iacute'   , 205,
 169   'Icirc'    , 206,
 170   'Iuml'     , 207,
 171   'ETH'      , 208,
 172   'Ntilde'   , 209,
 173   'Ograve'   , 210,
 174   'Oacute'   , 211,
 175   'Ocirc'    , 212,
 176   'Otilde'   , 213,
 177   'Ouml'     , 214,
 178   'times'    , 215,
 179   'Oslash'   , 216,
 180   'Ugrave'   , 217,
 181   'Uacute'   , 218,
 182   'Ucirc'    , 219,
 183   'Uuml'     , 220,
 184   'Yacute'   , 221,
 185   'THORN'    , 222,
 186   'szlig'    , 223,
 187   'agrave'   , 224,
 188   'aacute'   , 225,
 189   'acirc'    , 226,
 190   'atilde'   , 227,
 191   'auml'     , 228,
 192   'aring'    , 229,
 193   'aelig'    , 230,
 194   'ccedil'   , 231,
 195   'egrave'   , 232,
 196   'eacute'   , 233,
 197   'ecirc'    , 234,
 198   'euml'     , 235,
 199   'igrave'   , 236,
 200   'iacute'   , 237,
 201   'icirc'    , 238,
 202   'iuml'     , 239,
 203   'eth'      , 240,
 204   'ntilde'   , 241,
 205   'ograve'   , 242,
 206   'oacute'   , 243,
 207   'ocirc'    , 244,
 208   'otilde'   , 245,
 209   'ouml'     , 246,
 210   'divide'   , 247,
 211   'oslash'   , 248,
 212   'ugrave'   , 249,
 213   'uacute'   , 250,
 214   'ucirc'    , 251,
 215   'uuml'     , 252,
 216   'yacute'   , 253,
 217   'thorn'    , 254,
 218   'yuml'     , 255,
 219  
 220   'fnof'     , 402,
 221   'Alpha'    , 913,
 222   'Beta'     , 914,
 223   'Gamma'    , 915,
 224   'Delta'    , 916,
 225   'Epsilon'  , 917,
 226   'Zeta'     , 918,
 227   'Eta'      , 919,
 228   'Theta'    , 920,
 229   'Iota'     , 921,
 230   'Kappa'    , 922,
 231   'Lambda'   , 923,
 232   'Mu'       , 924,
 233   'Nu'       , 925,
 234   'Xi'       , 926,
 235   'Omicron'  , 927,
 236   'Pi'       , 928,
 237   'Rho'      , 929,
 238   'Sigma'    , 931,
 239   'Tau'      , 932,
 240   'Upsilon'  , 933,
 241   'Phi'      , 934,
 242   'Chi'      , 935,
 243   'Psi'      , 936,
 244   'Omega'    , 937,
 245   'alpha'    , 945,
 246   'beta'     , 946,
 247   'gamma'    , 947,
 248   'delta'    , 948,
 249   'epsilon'  , 949,
 250   'zeta'     , 950,
 251   'eta'      , 951,
 252   'theta'    , 952,
 253   'iota'     , 953,
 254   'kappa'    , 954,
 255   'lambda'   , 955,
 256   'mu'       , 956,
 257   'nu'       , 957,
 258   'xi'       , 958,
 259   'omicron'  , 959,
 260   'pi'       , 960,
 261   'rho'      , 961,
 262   'sigmaf'   , 962,
 263   'sigma'    , 963,
 264   'tau'      , 964,
 265   'upsilon'  , 965,
 266   'phi'      , 966,
 267   'chi'      , 967,
 268   'psi'      , 968,
 269   'omega'    , 969,
 270   'thetasym' , 977,
 271   'upsih'    , 978,
 272   'piv'      , 982,
 273   'bull'     , 8226,
 274   'hellip'   , 8230,
 275   'prime'    , 8242,
 276   'Prime'    , 8243,
 277   'oline'    , 8254,
 278   'frasl'    , 8260,
 279   'weierp'   , 8472,
 280   'image'    , 8465,
 281   'real'     , 8476,
 282   'trade'    , 8482,
 283   'alefsym'  , 8501,
 284   'larr'     , 8592,
 285   'uarr'     , 8593,
 286   'rarr'     , 8594,
 287   'darr'     , 8595,
 288   'harr'     , 8596,
 289   'crarr'    , 8629,
 290   'lArr'     , 8656,
 291   'uArr'     , 8657,
 292   'rArr'     , 8658,
 293   'dArr'     , 8659,
 294   'hArr'     , 8660,
 295   'forall'   , 8704,
 296   'part'     , 8706,
 297   'exist'    , 8707,
 298   'empty'    , 8709,
 299   'nabla'    , 8711,
 300   'isin'     , 8712,
 301   'notin'    , 8713,
 302   'ni'       , 8715,
 303   'prod'     , 8719,
 304   'sum'      , 8721,
 305   'minus'    , 8722,
 306   'lowast'   , 8727,
 307   'radic'    , 8730,
 308   'prop'     , 8733,
 309   'infin'    , 8734,
 310   'ang'      , 8736,
 311   'and'      , 8743,
 312   'or'       , 8744,
 313   'cap'      , 8745,
 314   'cup'      , 8746,
 315   'int'      , 8747,
 316   'there4'   , 8756,
 317   'sim'      , 8764,
 318   'cong'     , 8773,
 319   'asymp'    , 8776,
 320   'ne'       , 8800,
 321   'equiv'    , 8801,
 322   'le'       , 8804,
 323   'ge'       , 8805,
 324   'sub'      , 8834,
 325   'sup'      , 8835,
 326   'nsub'     , 8836,
 327   'sube'     , 8838,
 328   'supe'     , 8839,
 329   'oplus'    , 8853,
 330   'otimes'   , 8855,
 331   'perp'     , 8869,
 332   'sdot'     , 8901,
 333   'lceil'    , 8968,
 334   'rceil'    , 8969,
 335   'lfloor'   , 8970,
 336   'rfloor'   , 8971,
 337   'lang'     , 9001,
 338   'rang'     , 9002,
 339   'loz'      , 9674,
 340   'spades'   , 9824,
 341   'clubs'    , 9827,
 342   'hearts'   , 9829,
 343   'diams'    , 9830,
 344   'OElig'    , 338,
 345   'oelig'    , 339,
 346   'Scaron'   , 352,
 347   'scaron'   , 353,
 348   'Yuml'     , 376,
 349   'circ'     , 710,
 350   'tilde'    , 732,
 351   'ensp'     , 8194,
 352   'emsp'     , 8195,
 353   'thinsp'   , 8201,
 354   'zwnj'     , 8204,
 355   'zwj'      , 8205,
 356   'lrm'      , 8206,
 357   'rlm'      , 8207,
 358   'ndash'    , 8211,
 359   'mdash'    , 8212,
 360   'lsquo'    , 8216,
 361   'rsquo'    , 8217,
 362   'sbquo'    , 8218,
 363   'ldquo'    , 8220,
 364   'rdquo'    , 8221,
 365   'bdquo'    , 8222,
 366   'dagger'   , 8224,
 367   'Dagger'   , 8225,
 368   'permil'   , 8240,
 369   'lsaquo'   , 8249,
 370   'rsaquo'   , 8250,
 371   'euro'     , 8364,
 372  );
 373  
 374  
 375  # Fill out %Name2character...
 376  {
 377    %Name2character = ();
 378    my($name, $number);
 379    while( ($name, $number) = each %Name2character_number) {
 380      if($] < 5.007  and  $number > 255) {
 381        $Name2character{$name} = $FAR_CHAR;
 382        # substitute for Unicode characters, for perls
 383        #  that can't reliable handle them
 384      } else {
 385        $Name2character{$name} = chr $number;
 386        # normal case
 387      }
 388    }
 389    # So they resolve 'right' even in EBCDIC-land
 390    $Name2character{'lt'  }   = '<';
 391    $Name2character{'gt'  }   = '>';
 392    $Name2character{'quot'}   = '"';
 393    $Name2character{'amp' }   = '&';
 394    $Name2character{'apos'}   = "'";
 395    $Name2character{'sol' }   = '/';
 396    $Name2character{'verbar'} = '|';
 397  }
 398  
 399  #--------------------------------------------------------------------------
 400  
 401  %Code2USASCII = (
 402  # mostly generated by
 403  #  perl -e "printf qq{  \x25 3s, '\x25s',\n}, $_, chr($_) foreach (32 .. 126)"
 404     32, ' ',
 405     33, '!',
 406     34, '"',
 407     35, '#',
 408     36, '$',
 409     37, '%',
 410     38, '&',
 411     39, "'", #!
 412     40, '(',
 413     41, ')',
 414     42, '*',
 415     43, '+',
 416     44, ',',
 417     45, '-',
 418     46, '.',
 419     47, '/',
 420     48, '0',
 421     49, '1',
 422     50, '2',
 423     51, '3',
 424     52, '4',
 425     53, '5',
 426     54, '6',
 427     55, '7',
 428     56, '8',
 429     57, '9',
 430     58, ':',
 431     59, ';',
 432     60, '<',
 433     61, '=',
 434     62, '>',
 435     63, '?',
 436     64, '@',
 437     65, 'A',
 438     66, 'B',
 439     67, 'C',
 440     68, 'D',
 441     69, 'E',
 442     70, 'F',
 443     71, 'G',
 444     72, 'H',
 445     73, 'I',
 446     74, 'J',
 447     75, 'K',
 448     76, 'L',
 449     77, 'M',
 450     78, 'N',
 451     79, 'O',
 452     80, 'P',
 453     81, 'Q',
 454     82, 'R',
 455     83, 'S',
 456     84, 'T',
 457     85, 'U',
 458     86, 'V',
 459     87, 'W',
 460     88, 'X',
 461     89, 'Y',
 462     90, 'Z',
 463     91, '[',
 464     92, "\\", #!
 465     93, ']',
 466     94, '^',
 467     95, '_',
 468     96, '`',
 469     97, 'a',
 470     98, 'b',
 471     99, 'c',
 472    100, 'd',
 473    101, 'e',
 474    102, 'f',
 475    103, 'g',
 476    104, 'h',
 477    105, 'i',
 478    106, 'j',
 479    107, 'k',
 480    108, 'l',
 481    109, 'm',
 482    110, 'n',
 483    111, 'o',
 484    112, 'p',
 485    113, 'q',
 486    114, 'r',
 487    115, 's',
 488    116, 't',
 489    117, 'u',
 490    118, 'v',
 491    119, 'w',
 492    120, 'x',
 493    121, 'y',
 494    122, 'z',
 495    123, '{',
 496    124, '|',
 497    125, '}',
 498    126, '~',
 499  );
 500  
 501  #--------------------------------------------------------------------------
 502  
 503  %Latin1Code_to_fallback = ();
 504  @Latin1Code_to_fallback{0xA0 .. 0xFF} = (
 505  # Copied from Text/Unidecode/x00.pm:
 506  
 507  ' ', qq{!}, qq{C/}, 'PS', qq{\$?}, qq{Y=}, qq{|}, 'SS', qq{"}, qq{(c)}, 'a', qq{<<}, qq{!}, "", qq{(r)}, qq{-},
 508  'deg', qq{+-}, '2', '3', qq{'}, 'u', 'P', qq{*}, qq{,}, '1', 'o', qq{>>}, qq{1/4}, qq{1/2}, qq{3/4}, qq{?},
 509  'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
 510  'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss',
 511  'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
 512  'd', 'n', 'o', 'o', 'o', 'o', 'o', qq{/}, 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
 513  
 514  );
 515  
 516  {
 517    # Now stuff %Latin1Char_to_fallback:
 518    %Latin1Char_to_fallback = ();
 519    my($k,$v);
 520    while( ($k,$v) = each %Latin1Code_to_fallback) {
 521      $Latin1Char_to_fallback{chr $k} = $v;
 522      #print chr($k), ' => ', $v, "\n";
 523    }
 524  }
 525  
 526  #--------------------------------------------------------------------------
 527  1;
 528  __END__
 529  
 530  =head1 NAME
 531  
 532  Pod::Escapes -- for resolving Pod EE<lt>...E<gt> sequences
 533  
 534  =head1 SYNOPSIS
 535  
 536    use Pod::Escapes qw(e2char);
 537    ...la la la, parsing POD, la la la...
 538    $text = e2char($e_node->label);
 539    unless(defined $text) {
 540      print "Unknown E sequence \"", $e_node->label, "\"!";
 541    }
 542    ...else print/interpolate $text...
 543  
 544  =head1 DESCRIPTION
 545  
 546  This module provides things that are useful in decoding
 547  Pod EE<lt>...E<gt> sequences.  Presumably, it should be used
 548  only by Pod parsers and/or formatters.
 549  
 550  By default, Pod::Escapes exports none of its symbols.  But
 551  you can request any of them to be exported.
 552  Either request them individually, as with
 553  C<use Pod::Escapes qw(symbolname symbolname2...);>,
 554  or you can do C<use Pod::Escapes qw(:ALL);> to get all
 555  exportable symbols.
 556  
 557  =head1 GOODIES
 558  
 559  =over
 560  
 561  =item e2char($e_content)
 562  
 563  Given a name or number that could appear in a
 564  C<EE<lt>name_or_numE<gt>> sequence, this returns the string that
 565  it stands for.  For example, C<e2char('sol')>, C<e2char('47')>,
 566  C<e2char('0x2F')>, and C<e2char('057')> all return "/",
 567  because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
 568  and C<EE<lt>057E<gt>>, all mean "/".  If
 569  the name has no known value (as with a name of "qacute") or is
 570  syntactally invalid (as with a name of "1/4"), this returns undef.
 571  
 572  =item e2charnum($e_content)
 573  
 574  Given a name or number that could appear in a
 575  C<EE<lt>name_or_numE<gt>> sequence, this returns the number of
 576  the Unicode character that this stands for.  For example,
 577  C<e2char('sol')>, C<e2char('47')>,
 578  C<e2char('0x2F')>, and C<e2char('057')> all return 47,
 579  because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
 580  and C<EE<lt>057E<gt>>, all mean "/", whose Unicode number is 47.  If
 581  the name has no known value (as with a name of "qacute") or is
 582  syntactally invalid (as with a name of "1/4"), this returns undef.
 583  
 584  =item $Name2character{I<name>}
 585  
 586  Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
 587  to the string that each stands for.  Note that this does not
 588  include numerics (like "64" or "x981c").  Under old Perl versions
 589  (before 5.7) you get a "?" in place of characters whose Unicode
 590  value is over 255.
 591  
 592  =item $Name2character_number{I<name>}
 593  
 594  Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
 595  to the Unicode value that each stands for.  For example,
 596  C<$Name2character_number{'eacute'}> is 201, and
 597  C<$Name2character_number{'eacute'}> is 8364.  You get the correct
 598  Unicode value, regardless of the version of Perl you're using --
 599  which differs from C<%Name2character>'s behavior under pre-5.7 Perls.
 600  
 601  Note that this hash does not
 602  include numerics (like "64" or "x981c").
 603  
 604  =item $Latin1Code_to_fallback{I<integer>}
 605  
 606  For numbers in the range 160 (0x00A0) to 255 (0x00FF), this maps
 607  from the character code for a Latin-1 character (like 233 for
 608  lowercase e-acute) to the US-ASCII character that best aproximates
 609  it (like "e").  You may find this useful if you are rendering
 610  POD in a format that you think deals well only with US-ASCII
 611  characters.
 612  
 613  =item $Latin1Char_to_fallback{I<character>}
 614  
 615  Just as above, but maps from characters (like "\xE9", 
 616  lowercase e-acute) to characters (like "e").
 617  
 618  =item $Code2USASCII{I<integer>}
 619  
 620  This maps from US-ASCII codes (like 32) to the corresponding
 621  character (like space, for 32).  Only characters 32 to 126 are
 622  defined.  This is meant for use by C<e2char($x)> when it senses
 623  that it's running on a non-ASCII platform (where chr(32) doesn't
 624  get you a space -- but $Code2USASCII{32} will).  It's
 625  documented here just in case you might find it useful.
 626  
 627  =back
 628  
 629  =head1 CAVEATS
 630  
 631  On Perl versions before 5.7, Unicode characters with a value
 632  over 255 (like lambda or emdash) can't be conveyed.  This
 633  module does work under such early Perl versions, but in the
 634  place of each such character, you get a "?".  Latin-1
 635  characters (characters 160-255) are unaffected.
 636  
 637  Under EBCDIC platforms, C<e2char($n)> may not always be the
 638  same as C<chr(e2charnum($n))>, and ditto for
 639  C<$Name2character{$name}> and
 640  C<chr($Name2character_number{$name})>.
 641  
 642  =head1 SEE ALSO
 643  
 644  L<perlpod|perlpod>
 645  
 646  L<perlpodspec|perlpodspec>
 647  
 648  L<Text::Unidecode|Text::Unidecode>
 649  
 650  =head1 COPYRIGHT AND DISCLAIMERS
 651  
 652  Copyright (c) 2001-2004 Sean M. Burke.  All rights reserved.
 653  
 654  This library is free software; you can redistribute it and/or modify
 655  it under the same terms as Perl itself.
 656  
 657  This program is distributed in the hope that it will be useful, but
 658  without any warranty; without even the implied warranty of
 659  merchantability or fitness for a particular purpose.
 660  
 661  Portions of the data tables in this module are derived from the
 662  entity declarations in the W3C XHTML specification.
 663  
 664  Currently (October 2001), that's these three:
 665  
 666   http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
 667   http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
 668   http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
 669  
 670  =head1 AUTHOR
 671  
 672  Sean M. Burke C<sburke@cpan.org>
 673  
 674  =cut
 675  
 676  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 677  # What I used for reading the XHTML .ent files:
 678  
 679  use strict;
 680  my(@norms, @good, @bad);
 681  my $dir = 'c:/sgml/docbook/';
 682  my %escapes;
 683  foreach my $file (qw(
 684    xhtml-symbol.ent
 685    xhtml-lat1.ent
 686    xhtml-special.ent
 687  )) {
 688    open(IN, "<$dir$file") or die "can't read-open $dir$file: $!";
 689    print "Reading $file...\n";
 690    while(<IN>) {
 691      if(m/<!ENTITY\s+(\S+)\s+"&#([^;]+);">/) {
 692        my($name, $value) = ($1,$2);
 693        next if $name eq 'quot' or $name eq 'apos' or $name eq 'gt';
 694      
 695        $value = hex $1 if $value =~ m/^x([a-fA-F0-9]+)$/s;
 696        print "ILLEGAL VALUE $value" unless $value =~ m/^\d+$/s;
 697        if($value > 255) {
 698          push @good , sprintf "   %-10s , chr(%s),\n", "'$name'", $value;
 699          push @bad  , sprintf "   %-10s , \$bad,\n", "'$name'", $value;
 700        } else {
 701          push @norms, sprintf " %-10s , chr(%s),\n", "'$name'", $value;
 702        }
 703      } elsif(m/<!ENT/) {
 704        print "# Skipping $_";
 705      }
 706    
 707    }
 708    close(IN);
 709  }
 710  
 711  print @norms;
 712  print "\n ( \$] .= 5.006001 ? (\n";
 713  print @good;
 714  print " ) : (\n";
 715  print @bad;
 716  print " )\n);\n";
 717  
 718  __END__
 719  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 720  
 721  


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1