[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 2 require 5; 3 package Pod::Simple; 4 use strict; 5 use Carp (); 6 BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } 7 use integer; 8 use Pod::Escapes 1.03 (); 9 use Pod::Simple::LinkSection (); 10 use Pod::Simple::BlackBox (); 11 #use utf8; 12 13 use vars qw( 14 $VERSION @ISA 15 @Known_formatting_codes @Known_directives 16 %Known_formatting_codes %Known_directives 17 $NL 18 ); 19 20 @ISA = ('Pod::Simple::BlackBox'); 21 $VERSION = '3.05'; 22 23 @Known_formatting_codes = qw(I B C L E F S X Z); 24 %Known_formatting_codes = map(($_=>1), @Known_formatting_codes); 25 @Known_directives = qw(head1 head2 head3 head4 item over back); 26 %Known_directives = map(($_=>'Plain'), @Known_directives); 27 $NL = $/ unless defined $NL; 28 29 #----------------------------------------------------------------------------- 30 # Set up some constants: 31 32 BEGIN { 33 if(defined &ASCII) { } 34 elsif(chr(65) eq 'A') { *ASCII = sub () {1} } 35 else { *ASCII = sub () {''} } 36 37 unless(defined &MANY_LINES) { *MANY_LINES = sub () {20} } 38 DEBUG > 4 and print "MANY_LINES is ", MANY_LINES(), "\n"; 39 unless(MANY_LINES() >= 1) { 40 die "MANY_LINES is too small (", MANY_LINES(), ")!\nAborting"; 41 } 42 if(defined &UNICODE) { } 43 elsif($] >= 5.008) { *UNICODE = sub() {1} } 44 else { *UNICODE = sub() {''} } 45 } 46 if(DEBUG > 2) { 47 print "# We are ", ASCII ? '' : 'not ', "in ASCII-land\n"; 48 print "# We are under a Unicode-safe Perl.\n"; 49 } 50 51 # Design note: 52 # This is a parser for Pod. It is not a parser for the set of Pod-like 53 # languages which happens to contain Pod -- it is just for Pod, plus possibly 54 # some extensions. 55 56 # @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 57 #@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ 58 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 59 60 __PACKAGE__->_accessorize( 61 'nbsp_for_S', # Whether to map S<...>'s to \xA0 characters 62 'source_filename', # Filename of the source, for use in warnings 63 'source_dead', # Whether to consider this parser's source dead 64 65 'output_fh', # The filehandle we're writing to, if applicable. 66 # Used only in some derived classes. 67 68 'hide_line_numbers', # For some dumping subclasses: whether to pointedly 69 # suppress the start_line attribute 70 71 'line_count', # the current line number 72 'pod_para_count', # count of pod paragraphs seen so far 73 74 'no_whining', # whether to suppress whining 75 'no_errata_section', # whether to suppress the errata section 76 'complain_stderr', # whether to complain to stderr 77 78 'doc_has_started', # whether we've fired the open-Document event yet 79 80 'bare_output', # For some subclasses: whether to prepend 81 # header-code and postpend footer-code 82 83 'fullstop_space_harden', # Whether to turn ". " into ".[nbsp] "; 84 85 'nix_X_codes', # whether to ignore X<...> codes 86 'merge_text', # whether to avoid breaking a single piece of 87 # text up into several events 88 89 'preserve_whitespace', # whether to try to keep whitespace as-is 90 91 'content_seen', # whether we've seen any real Pod content 92 'errors_seen', # TODO: document. whether we've seen any errors (fatal or not) 93 94 'codes_in_verbatim', # for PseudoPod extensions 95 96 'code_handler', # coderef to call when a code (non-pod) line is seen 97 'cut_handler', # coderef to call when a =cut line is seen 98 #Called like: 99 # $code_handler->($line, $self->{'line_count'}, $self) if $code_handler; 100 # $cut_handler->($line, $self->{'line_count'}, $self) if $cut_handler; 101 102 ); 103 104 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 105 106 sub any_errata_seen { # good for using as an exit() value... 107 return shift->{'errors_seen'} || 0; 108 } 109 110 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 111 # Pull in some functions that, for some reason, I expect to see here too: 112 BEGIN { 113 *pretty = \&Pod::Simple::BlackBox::pretty; 114 *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol; 115 } 116 117 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 118 119 sub version_report { 120 my $class = ref($_[0]) || $_[0]; 121 if($class eq __PACKAGE__) { 122 return "$class $VERSION"; 123 } else { 124 my $v = $class->VERSION; 125 return "$class $v (" . __PACKAGE__ . " $VERSION)"; 126 } 127 } 128 129 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 130 131 #sub curr_open { # read-only list accessor 132 # return @{ $_[0]{'curr_open'} || return() }; 133 #} 134 #sub _curr_open_listref { $_[0]{'curr_open'} ||= [] } 135 136 137 sub output_string { 138 # Works by faking out output_fh. Simplifies our code. 139 # 140 my $this = shift; 141 return $this->{'output_string'} unless @_; # GET. 142 143 require Pod::Simple::TiedOutFH; 144 my $x = (defined($_[0]) and ref($_[0])) ? $_[0] : \( $_[0] ); 145 $$x = '' unless defined $$x; 146 DEBUG > 4 and print "# Output string set to $x ($$x)\n"; 147 $this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]); 148 return 149 $this->{'output_string'} = $_[0]; 150 #${ ${ $this->{'output_fh'} } }; 151 } 152 153 sub abandon_output_string { $_[0]->abandon_output_fh; delete $_[0]{'output_string'} } 154 sub abandon_output_fh { $_[0]->output_fh(undef) } 155 # These don't delete the string or close the FH -- they just delete our 156 # references to it/them. 157 # TODO: document these 158 159 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 160 161 sub new { 162 # takes no parameters 163 my $class = ref($_[0]) || $_[0]; 164 #Carp::croak(__PACKAGE__ . " is a virtual base class -- see perldoc " 165 # . __PACKAGE__ ); 166 return bless { 167 'accept_codes' => { map( ($_=>$_), @Known_formatting_codes ) }, 168 'accept_directives' => { %Known_directives }, 169 'accept_targets' => {}, 170 }, $class; 171 } 172 173 174 175 # TODO: an option for whether to interpolate E<...>'s, or just resolve to codes. 176 177 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 178 179 sub _handle_element_start { # OVERRIDE IN DERIVED CLASS 180 my($self, $element_name, $attr_hash_r) = @_; 181 return; 182 } 183 184 sub _handle_element_end { # OVERRIDE IN DERIVED CLASS 185 my($self, $element_name) = @_; 186 return; 187 } 188 189 sub _handle_text { # OVERRIDE IN DERIVED CLASS 190 my($self, $text) = @_; 191 return; 192 } 193 194 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 195 # 196 # And now directives (not targets) 197 198 sub accept_directive_as_verbatim { shift->_accept_directives('Verbatim', @_) } 199 sub accept_directive_as_data { shift->_accept_directives('Data', @_) } 200 sub accept_directive_as_processed { shift->_accept_directives('Plain', @_) } 201 202 sub _accept_directives { 203 my($this, $type) = splice @_,0,2; 204 foreach my $d (@_) { 205 next unless defined $d and length $d; 206 Carp::croak "\"$d\" isn't a valid directive name" 207 unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s; 208 Carp::croak "\"$d\" is already a reserved Pod directive name" 209 if exists $Known_directives{$d}; 210 $this->{'accept_directives'}{$d} = $type; 211 DEBUG > 2 and print "Learning to accept \"=$d\" as directive of type $type\n"; 212 } 213 DEBUG > 6 and print "$this\'s accept_directives : ", 214 pretty($this->{'accept_directives'}), "\n"; 215 216 return sort keys %{ $this->{'accept_directives'} } if wantarray; 217 return; 218 } 219 220 #-------------------------------------------------------------------------- 221 # TODO: document these: 222 223 sub unaccept_directive { shift->unaccept_directives(@_) }; 224 225 sub unaccept_directives { 226 my $this = shift; 227 foreach my $d (@_) { 228 next unless defined $d and length $d; 229 Carp::croak "\"$d\" isn't a valid directive name" 230 unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s; 231 Carp::croak "But you must accept \"$d\" directives -- it's a builtin!" 232 if exists $Known_directives{$d}; 233 delete $this->{'accept_directives'}{$d}; 234 DEBUG > 2 and print "OK, won't accept \"=$d\" as directive.\n"; 235 } 236 return sort keys %{ $this->{'accept_directives'} } if wantarray; 237 return 238 } 239 240 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 241 # 242 # And now targets (not directives) 243 244 sub accept_target { shift->accept_targets(@_) } # alias 245 sub accept_target_as_text { shift->accept_targets_as_text(@_) } # alias 246 247 248 sub accept_targets { shift->_accept_targets('1', @_) } 249 250 sub accept_targets_as_text { shift->_accept_targets('force_resolve', @_) } 251 # forces them to be processed, even when there's no ":". 252 253 sub _accept_targets { 254 my($this, $type) = splice @_,0,2; 255 foreach my $t (@_) { 256 next unless defined $t and length $t; 257 # TODO: enforce some limitations on what a target name can be? 258 $this->{'accept_targets'}{$t} = $type; 259 DEBUG > 2 and print "Learning to accept \"$t\" as target of type $type\n"; 260 } 261 return sort keys %{ $this->{'accept_targets'} } if wantarray; 262 return; 263 } 264 265 #-------------------------------------------------------------------------- 266 sub unaccept_target { shift->unaccept_targets(@_) } 267 268 sub unaccept_targets { 269 my $this = shift; 270 foreach my $t (@_) { 271 next unless defined $t and length $t; 272 # TODO: enforce some limitations on what a target name can be? 273 delete $this->{'accept_targets'}{$t}; 274 DEBUG > 2 and print "OK, won't accept \"$t\" as target.\n"; 275 } 276 return sort keys %{ $this->{'accept_targets'} } if wantarray; 277 return; 278 } 279 280 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 281 # 282 # And now codes (not targets or directives) 283 284 sub accept_code { shift->accept_codes(@_) } # alias 285 286 sub accept_codes { # Add some codes 287 my $this = shift; 288 289 foreach my $new_code (@_) { 290 next unless defined $new_code and length $new_code; 291 if(ASCII) { 292 # A good-enough check that it's good as an XML Name symbol: 293 Carp::croak "\"$new_code\" isn't a valid element name" 294 if $new_code =~ 295 m/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/ 296 # Characters under 0x80 that aren't legal in an XML Name. 297 or $new_code =~ m/^[-\.0-9]/s 298 or $new_code =~ m/:[-\.0-9]/s; 299 # The legal under-0x80 Name characters that 300 # an XML Name still can't start with. 301 } 302 303 $this->{'accept_codes'}{$new_code} = $new_code; 304 305 # Yes, map to itself -- just so that when we 306 # see "=extend W [whatever] thatelementname", we say that W maps 307 # to whatever $this->{accept_codes}{thatelementname} is, 308 # i.e., "thatelementname". Then when we go re-mapping, 309 # a "W" in the treelet turns into "thatelementname". We only 310 # remap once. 311 # If we say we accept "W", then a "W" in the treelet simply turns 312 # into "W". 313 } 314 315 return; 316 } 317 318 #-------------------------------------------------------------------------- 319 sub unaccept_code { shift->unaccept_codes(@_) } 320 321 sub unaccept_codes { # remove some codes 322 my $this = shift; 323 324 foreach my $new_code (@_) { 325 next unless defined $new_code and length $new_code; 326 if(ASCII) { 327 # A good-enough check that it's good as an XML Name symbol: 328 Carp::croak "\"$new_code\" isn't a valid element name" 329 if $new_code =~ 330 m/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/ 331 # Characters under 0x80 that aren't legal in an XML Name. 332 or $new_code =~ m/^[-\.0-9]/s 333 or $new_code =~ m/:[-\.0-9]/s; 334 # The legal under-0x80 Name characters that 335 # an XML Name still can't start with. 336 } 337 338 Carp::croak "But you must accept \"$new_code\" codes -- it's a builtin!" 339 if grep $new_code eq $_, @Known_formatting_codes; 340 341 delete $this->{'accept_codes'}{$new_code}; 342 343 DEBUG > 2 and print "OK, won't accept the code $new_code<...>.\n"; 344 } 345 346 return; 347 } 348 349 350 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 351 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 352 353 sub parse_string_document { 354 my $self = shift; 355 my @lines; 356 foreach my $line_group (@_) { 357 next unless defined $line_group and length $line_group; 358 pos($line_group) = 0; 359 while($line_group =~ 360 m/([^\n\r]*)((?:\r?\n)?)/g 361 ) { 362 #print(">> $1\n"), 363 $self->parse_lines($1) 364 if length($1) or length($2) 365 or pos($line_group) != length($line_group); 366 # I.e., unless it's a zero-length "empty line" at the very 367 # end of "foo\nbar\n" (i.e., between the \n and the EOS). 368 } 369 } 370 $self->parse_lines(undef); # to signal EOF 371 return $self; 372 } 373 374 sub _init_fh_source { 375 my($self, $source) = @_; 376 377 #DEBUG > 1 and print "Declaring $source as :raw for starters\n"; 378 #$self->_apply_binmode($source, ':raw'); 379 #binmode($source, ":raw"); 380 381 return; 382 } 383 384 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:. 385 # 386 387 sub parse_file { 388 my($self, $source) = (@_); 389 390 if(!defined $source) { 391 Carp::croak("Can't use empty-string as a source for parse_file"); 392 } elsif(ref(\$source) eq 'GLOB') { 393 $self->{'source_filename'} = '' . ($source); 394 } elsif(ref $source) { 395 $self->{'source_filename'} = '' . ($source); 396 } elsif(!length $source) { 397 Carp::croak("Can't use empty-string as a source for parse_file"); 398 } else { 399 { 400 local *PODSOURCE; 401 open(PODSOURCE, "<$source") || Carp::croak("Can't open $source: $!"); 402 $self->{'source_filename'} = $source; 403 $source = *PODSOURCE{IO}; 404 } 405 $self->_init_fh_source($source); 406 } 407 # By here, $source is a FH. 408 409 $self->{'source_fh'} = $source; 410 411 my($i, @lines); 412 until( $self->{'source_dead'} ) { 413 splice @lines; 414 for($i = MANY_LINES; $i--;) { # read those many lines at a time 415 local $/ = $NL; 416 push @lines, scalar(<$source>); # readline 417 last unless defined $lines[-1]; 418 # but pass thru the undef, which will set source_dead to true 419 } 420 $self->parse_lines(@lines); 421 } 422 delete($self->{'source_fh'}); # so it can be GC'd 423 return $self; 424 } 425 426 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:. 427 428 sub parse_from_file { 429 # An emulation of Pod::Parser's interface, for the sake of Perldoc. 430 # Basically just a wrapper around parse_file. 431 432 my($self, $source, $to) = @_; 433 $self = $self->new unless ref($self); # so we tolerate being a class method 434 435 if(!defined $source) { $source = *STDIN{IO} 436 } elsif(ref(\$source) eq 'GLOB') { # stet 437 } elsif(ref($source) ) { # stet 438 } elsif(!length $source 439 or $source eq '-' or $source =~ m/^<&(STDIN|0)$/i 440 ) { 441 $source = *STDIN{IO}; 442 } 443 444 if(!defined $to) { $self->output_fh( *STDOUT{IO} ); 445 } elsif(ref(\$to) eq 'GLOB') { $self->output_fh( $to ); 446 } elsif(ref($to)) { $self->output_fh( $to ); 447 } elsif(!length $to 448 or $to eq '-' or $to =~ m/^>&?(?:STDOUT|1)$/i 449 ) { 450 $self->output_fh( *STDOUT{IO} ); 451 } else { 452 require Symbol; 453 my $out_fh = Symbol::gensym(); 454 DEBUG and print "Write-opening to $to\n"; 455 open($out_fh, ">$to") or Carp::croak "Can't write-open $to: $!"; 456 binmode($out_fh) 457 if $self->can('write_with_binmode') and $self->write_with_binmode; 458 $self->output_fh($out_fh); 459 } 460 461 return $self->parse_file($source); 462 } 463 464 #----------------------------------------------------------------------------- 465 466 sub whine { 467 #my($self,$line,$complaint) = @_; 468 my $self = shift(@_); 469 ++$self->{'errors_seen'}; 470 if($self->{'no_whining'}) { 471 DEBUG > 9 and print "Discarding complaint (at line $_[0]) $_[1]\n because no_whining is on.\n"; 472 return; 473 } 474 return $self->_complain_warn(@_) if $self->{'complain_stderr'}; 475 return $self->_complain_errata(@_); 476 } 477 478 sub scream { # like whine, but not suppressable 479 #my($self,$line,$complaint) = @_; 480 my $self = shift(@_); 481 ++$self->{'errors_seen'}; 482 return $self->_complain_warn(@_) if $self->{'complain_stderr'}; 483 return $self->_complain_errata(@_); 484 } 485 486 sub _complain_warn { 487 my($self,$line,$complaint) = @_; 488 return printf STDERR "%s around line %s: %s\n", 489 $self->{'source_filename'} || 'Pod input', $line, $complaint; 490 } 491 492 sub _complain_errata { 493 my($self,$line,$complaint) = @_; 494 if( $self->{'no_errata_section'} ) { 495 DEBUG > 9 and print "Discarding erratum (at line $line) $complaint\n because no_errata_section is on.\n"; 496 } else { 497 DEBUG > 9 and print "Queuing erratum (at line $line) $complaint\n"; 498 push @{$self->{'errata'}{$line}}, $complaint 499 # for a report to be generated later! 500 } 501 return 1; 502 } 503 504 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 505 506 sub _get_initial_item_type { 507 # A hack-wrapper here for when you have like "=over\n\n=item 456\n\n" 508 my($self, $para) = @_; 509 return $para->[1]{'~type'} if $para->[1]{'~type'}; 510 511 return $para->[1]{'~type'} = 'text' 512 if join("\n", @{$para}[2 .. $#$para]) =~ m/^\s*(\d+)\.?\s*$/s and $1 ne '1'; 513 # Else fall thru to the general case: 514 return $self->_get_item_type($para); 515 } 516 517 518 519 sub _get_item_type { # mutates the item!! 520 my($self, $para) = @_; 521 return $para->[1]{'~type'} if $para->[1]{'~type'}; 522 523 524 # Otherwise we haven't yet been to this node. Maybe alter it... 525 526 my $content = join "\n", @{$para}[2 .. $#$para]; 527 528 if($content =~ m/^\s*\*\s*$/s or $content =~ m/^\s*$/s) { 529 # Like: "=item *", "=item * ", "=item" 530 splice @$para, 2; # so it ends up just being ['=item', { attrhash } ] 531 $para->[1]{'~orig_content'} = $content; 532 return $para->[1]{'~type'} = 'bullet'; 533 534 } elsif($content =~ m/^\s*\*\s+(.+)/s) { # tolerance 535 536 # Like: "=item * Foo bar baz"; 537 $para->[1]{'~orig_content'} = $content; 538 $para->[1]{'~_freaky_para_hack'} = $1; 539 DEBUG > 2 and print " Tolerating $$para[2] as =item *\\n\\n$1\n"; 540 splice @$para, 2; # so it ends up just being ['=item', { attrhash } ] 541 return $para->[1]{'~type'} = 'bullet'; 542 543 } elsif($content =~ m/^\s*(\d+)\.?\s*$/s) { 544 # Like: "=item 1.", "=item 123412" 545 546 $para->[1]{'~orig_content'} = $content; 547 $para->[1]{'number'} = $1; # Yes, stores the number there! 548 549 splice @$para, 2; # so it ends up just being ['=item', { attrhash } ] 550 return $para->[1]{'~type'} = 'number'; 551 552 } else { 553 # It's anything else. 554 return $para->[1]{'~type'} = 'text'; 555 556 } 557 } 558 559 #----------------------------------------------------------------------------- 560 561 sub _make_treelet { 562 my $self = shift; # and ($para, $start_line) 563 my $treelet; 564 if(!@_) { 565 return ['']; 566 } if(ref $_[0] and ref $_[0][0] and $_[0][0][0] eq '~Top') { 567 # Hack so we can pass in fake-o pre-cooked paragraphs: 568 # just have the first line be a reference to a ['~Top', {}, ...] 569 # We use this feechure in gen_errata and stuff. 570 571 DEBUG and print "Applying precooked treelet hack to $_[0][0]\n"; 572 $treelet = $_[0][0]; 573 splice @$treelet, 0, 2; # lop the top off 574 return $treelet; 575 } else { 576 $treelet = $self->_treelet_from_formatting_codes(@_); 577 } 578 579 if( $self->_remap_sequences($treelet) ) { 580 $self->_treat_Zs($treelet); # Might as well nix these first 581 $self->_treat_Ls($treelet); # L has to precede E and S 582 $self->_treat_Es($treelet); 583 $self->_treat_Ss($treelet); # S has to come after E 584 585 $self->_wrap_up($treelet); # Nix X's and merge texties 586 587 } else { 588 DEBUG and print "Formatless treelet gets fast-tracked.\n"; 589 # Very common case! 590 } 591 592 splice @$treelet, 0, 2; # lop the top off 593 594 return $treelet; 595 } 596 597 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:. 598 599 sub _wrap_up { 600 my($self, @stack) = @_; 601 my $nixx = $self->{'nix_X_codes'}; 602 my $merge = $self->{'merge_text' }; 603 return unless $nixx or $merge; 604 605 DEBUG > 2 and print "\nStarting _wrap_up traversal.\n", 606 $merge ? (" Merge mode on\n") : (), 607 $nixx ? (" Nix-X mode on\n") : (), 608 ; 609 610 611 my($i, $treelet); 612 while($treelet = shift @stack) { 613 DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n"; 614 for($i = 2; $i < @$treelet; ++$i) { # iterate over children 615 DEBUG > 3 and print " Considering child at $i ", pretty($treelet->[$i]), "\n"; 616 if($nixx and ref $treelet->[$i] and $treelet->[$i][0] eq 'X') { 617 DEBUG > 3 and print " Nixing X node at $i\n"; 618 splice(@$treelet, $i, 1); # just nix this node (and its descendants) 619 # no need to back-update the counter just yet 620 redo; 621 622 } elsif($merge and $i != 2 and # non-initial 623 !ref $treelet->[$i] and !ref $treelet->[$i - 1] 624 ) { 625 DEBUG > 3 and print " Merging ", $i-1, 626 ":[$treelet->[$i-1]] and $i\:[$treelet->[$i]]\n"; 627 $treelet->[$i-1] .= ( splice(@$treelet, $i, 1) )[0]; 628 DEBUG > 4 and print " Now: ", $i-1, ":[$treelet->[$i-1]]\n"; 629 --$i; 630 next; 631 # since we just pulled the possibly last node out from under 632 # ourselves, we can't just redo() 633 634 } elsif( ref $treelet->[$i] ) { 635 DEBUG > 4 and print " Enqueuing ", pretty($treelet->[$i]), " for traversal.\n"; 636 push @stack, $treelet->[$i]; 637 638 if($treelet->[$i][0] eq 'L') { 639 my $thing; 640 foreach my $attrname ('section', 'to') { 641 if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) { 642 unshift @stack, $thing; 643 DEBUG > 4 and print " +Enqueuing ", 644 pretty( $treelet->[$i][1]{$attrname} ), 645 " as an attribute value to tweak.\n"; 646 } 647 } 648 } 649 } 650 } 651 } 652 DEBUG > 2 and print "End of _wrap_up traversal.\n\n"; 653 654 return; 655 } 656 657 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:. 658 659 sub _remap_sequences { 660 my($self,@stack) = @_; 661 662 if(@stack == 1 and @{ $stack[0] } == 3 and !ref $stack[0][2]) { 663 # VERY common case: abort it. 664 DEBUG and print "Skipping _remap_sequences: formatless treelet.\n"; 665 return 0; 666 } 667 668 my $map = ($self->{'accept_codes'} || die "NO accept_codes in $self?!?"); 669 670 my $start_line = $stack[0][1]{'start_line'}; 671 DEBUG > 2 and printf 672 "\nAbout to start _remap_sequences on treelet from line %s.\n", 673 $start_line || '[?]' 674 ; 675 DEBUG > 3 and print " Map: ", 676 join('; ', map "$_=" . ( 677 ref($map->{$_}) ? join(",", @{$map->{$_}}) : $map->{$_} 678 ), 679 sort keys %$map ), 680 ("B~C~E~F~I~L~S~X~Z" eq join '~', sort keys %$map) 681 ? " (all normal)\n" : "\n" 682 ; 683 684 # A recursive algorithm implemented iteratively! Whee! 685 686 my($is, $was, $i, $treelet); # scratch 687 while($treelet = shift @stack) { 688 DEBUG > 3 and print " Considering children of this $treelet->[0] node...\n"; 689 for($i = 2; $i < @$treelet; ++$i) { # iterate over children 690 next unless ref $treelet->[$i]; # text nodes are uninteresting 691 692 DEBUG > 4 and print " Noting child $i : $treelet->[$i][0]<...>\n"; 693 694 $is = $treelet->[$i][0] = $map->{ $was = $treelet->[$i][0] }; 695 if( DEBUG > 3 ) { 696 if(!defined $is) { 697 print " Code $was<> is UNKNOWN!\n"; 698 } elsif($is eq $was) { 699 DEBUG > 4 and print " Code $was<> stays the same.\n"; 700 } else { 701 print " Code $was<> maps to ", 702 ref($is) 703 ? ( "tags ", map("$_<", @$is), '...', map('>', @$is), "\n" ) 704 : "tag $is<...>.\n"; 705 } 706 } 707 708 if(!defined $is) { 709 $self->whine($start_line, "Deleting unknown formatting code $was<>"); 710 $is = $treelet->[$i][0] = '1'; # But saving the children! 711 # I could also insert a leading "$was<" and tailing ">" as 712 # children of this node, but something about that seems icky. 713 } 714 if(ref $is) { 715 my @dynasty = @$is; 716 DEBUG > 4 and print " Renaming $was node to $dynasty[-1]\n"; 717 $treelet->[$i][0] = pop @dynasty; 718 my $nugget; 719 while(@dynasty) { 720 DEBUG > 4 and printf 721 " Grafting a new %s node between %s and %s\n", 722 $dynasty[-1], $treelet->[0], $treelet->[$i][0], 723 ; 724 725 #$nugget = ; 726 splice @$treelet, $i, 1, [pop(@dynasty), {}, $treelet->[$i]]; 727 # relace node with a new parent 728 } 729 } elsif($is eq '0') { 730 splice(@$treelet, $i, 1); # just nix this node (and its descendants) 731 --$i; # back-update the counter 732 } elsif($is eq '1') { 733 splice(@$treelet, $i, 1 # replace this node with its children! 734 => splice @{ $treelet->[$i] },2 735 # (not catching its first two (non-child) items) 736 ); 737 --$i; # back up for new stuff 738 } else { 739 # otherwise it's unremarkable 740 unshift @stack, $treelet->[$i]; # just recurse 741 } 742 } 743 } 744 745 DEBUG > 2 and print "End of _remap_sequences traversal.\n\n"; 746 747 if(@_ == 2 and @{ $_[1] } == 3 and !ref $_[1][2]) { 748 DEBUG and print "Noting that the treelet is now formatless.\n"; 749 return 0; 750 } 751 return 1; 752 } 753 754 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 755 756 sub _ponder_extend { 757 758 # "Go to an extreme, move back to a more comfortable place" 759 # -- /Oblique Strategies/, Brian Eno and Peter Schmidt 760 761 my($self, $para) = @_; 762 my $content = join ' ', splice @$para, 2; 763 $content =~ s/^\s+//s; 764 $content =~ s/\s+$//s; 765 766 DEBUG > 2 and print "Ogling extensor: =extend $content\n"; 767 768 if($content =~ 769 m/^ 770 (\S+) # 1 : new item 771 \s+ 772 (\S+) # 2 : fallback(s) 773 (?:\s+(\S+))? # 3 : element name(s) 774 \s* 775 $ 776 /xs 777 ) { 778 my $new_letter = $1; 779 my $fallbacks_one = $2; 780 my $elements_one; 781 $elements_one = defined($3) ? $3 : $1; 782 783 DEBUG > 2 and print "Extensor has good syntax.\n"; 784 785 unless($new_letter =~ m/^[A-Z]$/s or $new_letter) { 786 DEBUG > 2 and print " $new_letter isn't a valid thing to entend.\n"; 787 $self->whine( 788 $para->[1]{'start_line'}, 789 "You can extend only formatting codes A-Z, not like \"$new_letter\"" 790 ); 791 return; 792 } 793 794 if(grep $new_letter eq $_, @Known_formatting_codes) { 795 DEBUG > 2 and print " $new_letter isn't a good thing to extend, because known.\n"; 796 $self->whine( 797 $para->[1]{'start_line'}, 798 "You can't extend an established code like \"$new_letter\"" 799 ); 800 801 #TODO: or allow if last bit is same? 802 803 return; 804 } 805 806 unless($fallbacks_one =~ m/^[A-Z](,[A-Z])*$/s # like "B", "M,I", etc. 807 or $fallbacks_one eq '0' or $fallbacks_one eq '1' 808 ) { 809 $self->whine( 810 $para->[1]{'start_line'}, 811 "Format for second =extend parameter must be like" 812 . " M or 1 or 0 or M,N or M,N,O but you have it like " 813 . $fallbacks_one 814 ); 815 return; 816 } 817 818 unless($elements_one =~ m/^[^ ,]+(,[^ ,]+)*$/s) { # like "B", "M,I", etc. 819 $self->whine( 820 $para->[1]{'start_line'}, 821 "Format for third =extend parameter: like foo or bar,Baz,qu:ux but not like " 822 . $elements_one 823 ); 824 return; 825 } 826 827 my @fallbacks = split ',', $fallbacks_one, -1; 828 my @elements = split ',', $elements_one, -1; 829 830 foreach my $f (@fallbacks) { 831 next if exists $Known_formatting_codes{$f} or $f eq '0' or $f eq '1'; 832 DEBUG > 2 and print " Can't fall back on unknown code $f\n"; 833 $self->whine( 834 $para->[1]{'start_line'}, 835 "Can't use unknown formatting code '$f' as a fallback for '$new_letter'" 836 ); 837 return; 838 } 839 840 DEBUG > 3 and printf "Extensor: Fallbacks <%s> Elements <%s>.\n", 841 @fallbacks, @elements; 842 843 my $canonical_form; 844 foreach my $e (@elements) { 845 if(exists $self->{'accept_codes'}{$e}) { 846 DEBUG > 1 and print " Mapping '$new_letter' to known extension '$e'\n"; 847 $canonical_form = $e; 848 last; # first acceptable elementname wins! 849 } else { 850 DEBUG > 1 and print " Can't map '$new_letter' to unknown extension '$e'\n"; 851 } 852 } 853 854 855 if( defined $canonical_form ) { 856 # We found a good N => elementname mapping 857 $self->{'accept_codes'}{$new_letter} = $canonical_form; 858 DEBUG > 2 and print 859 "Extensor maps $new_letter => known element $canonical_form.\n"; 860 } else { 861 # We have to use the fallback(s), which might be '0', or '1'. 862 $self->{'accept_codes'}{$new_letter} 863 = (@fallbacks == 1) ? $fallbacks[0] : \@fallbacks; 864 DEBUG > 2 and print 865 "Extensor maps $new_letter => fallbacks @fallbacks.\n"; 866 } 867 868 } else { 869 DEBUG > 2 and print "Extensor has bad syntax.\n"; 870 $self->whine( 871 $para->[1]{'start_line'}, 872 "Unknown =extend syntax: $content" 873 ) 874 } 875 return; 876 } 877 878 879 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:. 880 881 sub _treat_Zs { # Nix Z<...>'s 882 my($self,@stack) = @_; 883 884 my($i, $treelet); 885 my $start_line = $stack[0][1]{'start_line'}; 886 887 # A recursive algorithm implemented iteratively! Whee! 888 889 while($treelet = shift @stack) { 890 for($i = 2; $i < @$treelet; ++$i) { # iterate over children 891 next unless ref $treelet->[$i]; # text nodes are uninteresting 892 unless($treelet->[$i][0] eq 'Z') { 893 unshift @stack, $treelet->[$i]; # recurse 894 next; 895 } 896 897 DEBUG > 1 and print "Nixing Z node @{$treelet->[$i]}\n"; 898 899 # bitch UNLESS it's empty 900 unless( @{$treelet->[$i]} == 2 901 or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '') 902 ) { 903 $self->whine( $start_line, "A non-empty Z<>" ); 904 } # but kill it anyway 905 906 splice(@$treelet, $i, 1); # thereby just nix this node. 907 --$i; 908 909 } 910 } 911 912 return; 913 } 914 915 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 916 917 # Quoting perlpodspec: 918 919 # In parsing an L<...> code, Pod parsers must distinguish at least four 920 # attributes: 921 922 ############# Not used. Expressed via the element children plus 923 ############# the value of the "content-implicit" flag. 924 # First: 925 # The link-text. If there is none, this must be undef. (E.g., in "L<Perl 926 # Functions|perlfunc>", the link-text is "Perl Functions". In 927 # "L<Time::HiRes>" and even "L<|Time::HiRes>", there is no link text. Note 928 # that link text may contain formatting.) 929 # 930 931 ############# The element children 932 # Second: 933 # The possibly inferred link-text -- i.e., if there was no real link text, 934 # then this is the text that we'll infer in its place. (E.g., for 935 # "L<Getopt::Std>", the inferred link text is "Getopt::Std".) 936 # 937 938 ############# The "to" attribute (which might be text, or a treelet) 939 # Third: 940 # The name or URL, or undef if none. (E.g., in "L<Perl 941 # Functions|perlfunc>", the name -- also sometimes called the page -- is 942 # "perlfunc". In "L</CAVEATS>", the name is undef.) 943 # 944 945 ############# The "section" attribute (which might be next, or a treelet) 946 # Fourth: 947 # The section (AKA "item" in older perlpods), or undef if none. E.g., in 948 # Getopt::Std/DESCRIPTION, "DESCRIPTION" is the section. (Note that this 949 # is not the same as a manpage section like the "5" in "man 5 crontab". 950 # "Section Foo" in the Pod sense means the part of the text that's 951 # introduced by the heading or item whose text is "Foo".) 952 # 953 # Pod parsers may also note additional attributes including: 954 # 955 956 ############# The "type" attribute. 957 # Fifth: 958 # A flag for whether item 3 (if present) is a URL (like 959 # "http://lists.perl.org" is), in which case there should be no section 960 # attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or 961 # possibly a man page name (like "crontab(5)" is). 962 # 963 964 ############# Not implemented, I guess. 965 # Sixth: 966 # The raw original L<...> content, before text is split on "|", "/", etc, 967 # and before E<...> codes are expanded. 968 969 970 # For L<...> codes without a "name|" part, only E<...> and Z<> codes may 971 # occur -- no other formatting codes. That is, authors should not use 972 # "L<B<Foo::Bar>>". 973 # 974 # Note, however, that formatting codes and Z<>'s can occur in any and all 975 # parts of an L<...> (i.e., in name, section, text, and url). 976 977 sub _treat_Ls { # Process our dear dear friends, the L<...> sequences 978 979 # L<name> 980 # L<name/"sec"> or L<name/sec> 981 # L</"sec"> or L</sec> or L<"sec"> 982 # L<text|name> 983 # L<text|name/"sec"> or L<text|name/sec> 984 # L<text|/"sec"> or L<text|/sec> or L<text|"sec"> 985 # L<scheme:...> 986 987 my($self,@stack) = @_; 988 989 my($i, $treelet); 990 my $start_line = $stack[0][1]{'start_line'}; 991 992 # A recursive algorithm implemented iteratively! Whee! 993 994 while($treelet = shift @stack) { 995 for(my $i = 2; $i < @$treelet; ++$i) { 996 # iterate over children of current tree node 997 next unless ref $treelet->[$i]; # text nodes are uninteresting 998 unless($treelet->[$i][0] eq 'L') { 999 unshift @stack, $treelet->[$i]; # recurse 1000 next; 1001 } 1002 1003 1004 # By here, $treelet->[$i] is definitely an L node 1005 DEBUG > 1 and print "Ogling L node $treelet->[$i]\n"; 1006 1007 # bitch if it's empty 1008 if( @{$treelet->[$i]} == 2 1009 or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '') 1010 ) { 1011 $self->whine( $start_line, "An empty L<>" ); 1012 $treelet->[$i] = 'L<>'; # just make it a text node 1013 next; # and move on 1014 } 1015 1016 # Catch URLs: 1017 # URLs can, alas, contain E<...> sequences, so we can't /assume/ 1018 # that this is one text node. But it has to START with one text 1019 # node... 1020 if(! ref $treelet->[$i][2] and 1021 $treelet->[$i][2] =~ m/^\w+:[^:\s]\S*$/s 1022 ) { 1023 $treelet->[$i][1]{'type'} = 'url'; 1024 $treelet->[$i][1]{'content-implicit'} = 'yes'; 1025 1026 # TODO: deal with rel: URLs here? 1027 1028 if( 3 == @{ $treelet->[$i] } ) { 1029 # But if it IS just one text node (most common case) 1030 DEBUG > 1 and printf qq{Catching "%s as " as ho-hum L<URL> link.\n}, 1031 $treelet->[$i][2] 1032 ; 1033 $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new( 1034 $treelet->[$i][2] 1035 ); # its own treelet 1036 } else { 1037 # It's a URL but complex (like "L<foo:bazE<123>bar>"). Feh. 1038 #$treelet->[$i][1]{'to'} = [ @{$treelet->[$i]} ]; 1039 #splice @{ $treelet->[$i][1]{'to'} }, 0,2; 1040 #DEBUG > 1 and printf qq{Catching "%s as " as complex L<URL> link.\n}, 1041 # join '~', @{$treelet->[$i][1]{'to' }}; 1042 1043 $treelet->[$i][1]{'to'} = Pod::Simple::LinkSection->new( 1044 $treelet->[$i] # yes, clone the whole content as a treelet 1045 ); 1046 $treelet->[$i][1]{'to'}[0] = ''; # set the copy's tagname to nil 1047 die "SANITY FAILURE" if $treelet->[0] eq ''; # should never happen! 1048 DEBUG > 1 and print 1049 qq{Catching "$treelet->[$i][1]{'to'}" as a complex L<URL> link.\n}; 1050 } 1051 1052 next; # and move on 1053 } 1054 1055 1056 # Catch some very simple and/or common cases 1057 if(@{$treelet->[$i]} == 3 and ! ref $treelet->[$i][2]) { 1058 my $it = $treelet->[$i][2]; 1059 if($it =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s) { # man sections 1060 # Hopefully neither too broad nor too restrictive a RE 1061 DEBUG > 1 and print "Catching \"$it\" as manpage link.\n"; 1062 $treelet->[$i][1]{'type'} = 'man'; 1063 # This's the only place where man links can get made. 1064 $treelet->[$i][1]{'content-implicit'} = 'yes'; 1065 $treelet->[$i][1]{'to' } = 1066 Pod::Simple::LinkSection->new( $it ); # treelet! 1067 1068 next; 1069 } 1070 if($it =~ m/^[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+(\:\:[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+)*$/s) { 1071 # Extremely forgiving idea of what constitutes a bare 1072 # modulename link like L<Foo::Bar> or even L<Thing::1.0::Docs::Tralala> 1073 DEBUG > 1 and print "Catching \"$it\" as ho-hum L<Modulename> link.\n"; 1074 $treelet->[$i][1]{'type'} = 'pod'; 1075 $treelet->[$i][1]{'content-implicit'} = 'yes'; 1076 $treelet->[$i][1]{'to' } = 1077 Pod::Simple::LinkSection->new( $it ); # treelet! 1078 next; 1079 } 1080 # else fall thru... 1081 } 1082 1083 1084 1085 # ...Uhoh, here's the real L<...> parsing stuff... 1086 # "With the ill behavior, with the ill behavior, with the ill behavior..." 1087 1088 DEBUG > 1 and print "Running a real parse on this non-trivial L\n"; 1089 1090 1091 my $link_text; # set to an arrayref if found 1092 my $ell = $treelet->[$i]; 1093 my @ell_content = @$ell; 1094 splice @ell_content,0,2; # Knock off the 'L' and {} bits 1095 1096 DEBUG > 3 and print " Ell content to start: ", 1097 pretty(@ell_content), "\n"; 1098 1099 1100 # Look for the "|" -- only in CHILDREN (not all underlings!) 1101 # Like L<I like the strictness|strict> 1102 DEBUG > 3 and 1103 print " Peering at L content for a '|' ...\n"; 1104 for(my $j = 0; $j < @ell_content; ++$j) { 1105 next if ref $ell_content[$j]; 1106 DEBUG > 3 and 1107 print " Peering at L-content text bit \"$ell_content[$j]\" for a '|'.\n"; 1108 1109 if($ell_content[$j] =~ m/^([^\|]*)\|(.*)$/s) { 1110 my @link_text = ($1); # might be 0-length 1111 $ell_content[$j] = $2; # might be 0-length 1112 1113 DEBUG > 3 and 1114 print " FOUND a '|' in it. Splitting into [$1] + [$2]\n"; 1115 1116 unshift @link_text, splice @ell_content, 0, $j; 1117 # leaving only things at J and after 1118 @ell_content = grep ref($_)||length($_), @ell_content ; 1119 $link_text = [grep ref($_)||length($_), @link_text ]; 1120 DEBUG > 3 and printf 1121 " So link text is %s\n and remaining ell content is %s\n", 1122 pretty($link_text), pretty(@ell_content); 1123 last; 1124 } 1125 } 1126 1127 1128 # Now look for the "/" -- only in CHILDREN (not all underlings!) 1129 # And afterward, anything left in @ell_content will be the raw name 1130 # Like L<Foo::Bar/Object Methods> 1131 my $section_name; # set to arrayref if found 1132 DEBUG > 3 and print " Peering at L-content for a '/' ...\n"; 1133 for(my $j = 0; $j < @ell_content; ++$j) { 1134 next if ref $ell_content[$j]; 1135 DEBUG > 3 and 1136 print " Peering at L-content text bit \"$ell_content[$j]\" for a '/'.\n"; 1137 1138 if($ell_content[$j] =~ m/^([^\/]*)\/(.*)$/s) { 1139 my @section_name = ($2); # might be 0-length 1140 $ell_content[$j] = $1; # might be 0-length 1141 1142 DEBUG > 3 and 1143 print " FOUND a '/' in it.", 1144 " Splitting to page [...$1] + section [$2...]\n"; 1145 1146 push @section_name, splice @ell_content, 1+$j; 1147 # leaving only things before and including J 1148 1149 @ell_content = grep ref($_)||length($_), @ell_content ; 1150 @section_name = grep ref($_)||length($_), @section_name ; 1151 1152 # Turn L<.../"foo"> into L<.../foo> 1153 if(@section_name 1154 and !ref($section_name[0]) and !ref($section_name[-1]) 1155 and $section_name[ 0] =~ m/^\"/s 1156 and $section_name[-1] =~ m/\"$/s 1157 and !( # catch weird degenerate case of L<"> ! 1158 @section_name == 1 and $section_name[0] eq '"' 1159 ) 1160 ) { 1161 $section_name[ 0] =~ s/^\"//s; 1162 $section_name[-1] =~ s/\"$//s; 1163 DEBUG > 3 and 1164 print " Quotes removed: ", pretty(@section_name), "\n"; 1165 } else { 1166 DEBUG > 3 and 1167 print " No need to remove quotes in ", pretty(@section_name), "\n"; 1168 } 1169 1170 $section_name = \@section_name; 1171 last; 1172 } 1173 } 1174 1175 # Turn L<"Foo Bar"> into L</Foo Bar> 1176 if(!$section_name and @ell_content 1177 and !ref($ell_content[0]) and !ref($ell_content[-1]) 1178 and $ell_content[ 0] =~ m/^\"/s 1179 and $ell_content[-1] =~ m/\"$/s 1180 and !( # catch weird degenerate case of L<"> ! 1181 @ell_content == 1 and $ell_content[0] eq '"' 1182 ) 1183 ) { 1184 $section_name = [splice @ell_content]; 1185 $section_name->[ 0] =~ s/^\"//s; 1186 $section_name->[-1] =~ s/\"$//s; 1187 } 1188 1189 # Turn L<Foo Bar> into L</Foo Bar>. 1190 if(!$section_name and !$link_text and @ell_content 1191 and grep !ref($_) && m/ /s, @ell_content 1192 ) { 1193 $section_name = [splice @ell_content]; 1194 # That's support for the now-deprecated syntax. 1195 # (Maybe generate a warning eventually?) 1196 # Note that it deliberately won't work on L<...|Foo Bar> 1197 } 1198 1199 1200 # Now make up the link_text 1201 # L<Foo> -> L<Foo|Foo> 1202 # L</Bar> -> L<"Bar"|Bar> 1203 # L<Foo/Bar> -> L<"Bar" in Foo/Foo> 1204 unless($link_text) { 1205 $ell->[1]{'content-implicit'} = 'yes'; 1206 $link_text = []; 1207 push @$link_text, '"', @$section_name, '"' if $section_name; 1208 1209 if(@ell_content) { 1210 $link_text->[-1] .= ' in ' if $section_name; 1211 push @$link_text, @ell_content; 1212 } 1213 } 1214 1215 1216 # And the E resolver will have to deal with all our treeletty things: 1217 1218 if(@ell_content == 1 and !ref($ell_content[0]) 1219 and $ell_content[0] =~ m/^[-a-zA-Z0-9]+\([-a-zA-Z0-9]+\)$/s 1220 ) { 1221 $ell->[1]{'type'} = 'man'; 1222 DEBUG > 3 and print "Considering this ($ell_content[0]) a man link.\n"; 1223 } else { 1224 $ell->[1]{'type'} = 'pod'; 1225 DEBUG > 3 and print "Considering this a pod link (not man or url).\n"; 1226 } 1227 1228 if( defined $section_name ) { 1229 $ell->[1]{'section'} = Pod::Simple::LinkSection->new( 1230 ['', {}, @$section_name] 1231 ); 1232 DEBUG > 3 and print "L-section content: ", pretty($ell->[1]{'section'}), "\n"; 1233 } 1234 1235 if( @ell_content ) { 1236 $ell->[1]{'to'} = Pod::Simple::LinkSection->new( 1237 ['', {}, @ell_content] 1238 ); 1239 DEBUG > 3 and print "L-to content: ", pretty($ell->[1]{'to'}), "\n"; 1240 } 1241 1242 # And update children to be the link-text: 1243 @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : ''); 1244 1245 DEBUG > 2 and print "End of L-parsing for this node $treelet->[$i]\n"; 1246 1247 unshift @stack, $treelet->[$i]; # might as well recurse 1248 } 1249 } 1250 1251 return; 1252 } 1253 1254 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1255 1256 sub _treat_Es { 1257 my($self,@stack) = @_; 1258 1259 my($i, $treelet, $content, $replacer, $charnum); 1260 my $start_line = $stack[0][1]{'start_line'}; 1261 1262 # A recursive algorithm implemented iteratively! Whee! 1263 1264 1265 # Has frightening side effects on L nodes' attributes. 1266 1267 #my @ells_to_tweak; 1268 1269 while($treelet = shift @stack) { 1270 for(my $i = 2; $i < @$treelet; ++$i) { # iterate over children 1271 next unless ref $treelet->[$i]; # text nodes are uninteresting 1272 if($treelet->[$i][0] eq 'L') { 1273 # SPECIAL STUFF for semi-processed L<>'s 1274 1275 my $thing; 1276 foreach my $attrname ('section', 'to') { 1277 if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) { 1278 unshift @stack, $thing; 1279 DEBUG > 2 and print " Enqueuing ", 1280 pretty( $treelet->[$i][1]{$attrname} ), 1281 " as an attribute value to tweak.\n"; 1282 } 1283 } 1284 1285 unshift @stack, $treelet->[$i]; # recurse 1286 next; 1287 } elsif($treelet->[$i][0] ne 'E') { 1288 unshift @stack, $treelet->[$i]; # recurse 1289 next; 1290 } 1291 1292 DEBUG > 1 and print "Ogling E node ", pretty($treelet->[$i]), "\n"; 1293 1294 # bitch if it's empty 1295 if( @{$treelet->[$i]} == 2 1296 or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '') 1297 ) { 1298 $self->whine( $start_line, "An empty E<>" ); 1299 $treelet->[$i] = 'E<>'; # splice in a literal 1300 next; 1301 } 1302 1303 # bitch if content is weird 1304 unless(@{$treelet->[$i]} == 3 and !ref($content = $treelet->[$i][2])) { 1305 $self->whine( $start_line, "An E<...> surrounding strange content" ); 1306 $replacer = $treelet->[$i]; # scratch 1307 splice(@$treelet, $i, 1, # fake out a literal 1308 'E<', 1309 splice(@$replacer,2), # promote its content 1310 '>' 1311 ); 1312 # Don't need to do --$i, as the 'E<' we just added isn't interesting. 1313 next; 1314 } 1315 1316 DEBUG > 1 and print "Ogling E<$content>\n"; 1317 1318 $charnum = Pod::Escapes::e2charnum($content); 1319 DEBUG > 1 and print " Considering E<$content> with char ", 1320 defined($charnum) ? $charnum : "undef", ".\n"; 1321 1322 if(!defined( $charnum )) { 1323 DEBUG > 1 and print "I don't know how to deal with E<$content>.\n"; 1324 $self->whine( $start_line, "Unknown E content in E<$content>" ); 1325 $replacer = "E<$content>"; # better than nothing 1326 } elsif($charnum >= 255 and !UNICODE) { 1327 $replacer = ASCII ? "\xA4" : "?"; 1328 DEBUG > 1 and print "This Perl version can't handle ", 1329 "E<$content> (chr $charnum), so replacing with $replacer\n"; 1330 } else { 1331 $replacer = Pod::Escapes::e2char($content); 1332 DEBUG > 1 and print " Replacing E<$content> with $replacer\n"; 1333 } 1334 1335 splice(@$treelet, $i, 1, $replacer); # no need to back up $i, tho 1336 } 1337 } 1338 1339 return; 1340 } 1341 1342 1343 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1344 1345 sub _treat_Ss { 1346 my($self,$treelet) = @_; 1347 1348 _change_S_to_nbsp($treelet,0) if $self->{'nbsp_for_S'}; 1349 1350 # TODO: or a change_nbsp_to_S 1351 # Normalizing nbsp's to S is harder: for each text node, make S content 1352 # out of anything matching m/([^ \xA0]*(?:\xA0+[^ \xA0]*)+)/ 1353 1354 1355 return; 1356 } 1357 1358 1359 sub _change_S_to_nbsp { # a recursive function 1360 # Sanely assumes that the top node in the excursion won't be an S node. 1361 my($treelet, $in_s) = @_; 1362 1363 my $is_s = ('S' eq $treelet->[0]); 1364 $in_s ||= $is_s; # So in_s is on either by this being an S element, 1365 # or by an ancestor being an S element. 1366 1367 for(my $i = 2; $i < @$treelet; ++$i) { 1368 if(ref $treelet->[$i]) { 1369 if( _change_S_to_nbsp( $treelet->[$i], $in_s ) ) { 1370 my $to_pull_up = $treelet->[$i]; 1371 splice @$to_pull_up,0,2; # ...leaving just its content 1372 splice @$treelet, $i, 1, @$to_pull_up; # Pull up content 1373 $i += @$to_pull_up - 1; # Make $i skip the pulled-up stuff 1374 } 1375 } else { 1376 $treelet->[$i] =~ s/\s/\xA0/g if ASCII and $in_s; 1377 # (If not in ASCIIland, we can't assume that \xA0 == nbsp.) 1378 1379 # Note that if you apply nbsp_for_S to text, and so turn 1380 # "foo S<bar baz> quux" into "foo bar faz quux", you 1381 # end up with something that fails to say "and don't hyphenate 1382 # any part of 'bar baz'". However, hyphenation is such a vexing 1383 # problem anyway, that most Pod renderers just don't render it 1384 # at all. But if you do want to implement hyphenation, I guess 1385 # that you'd better have nbsp_for_S off. 1386 } 1387 } 1388 1389 return $is_s; 1390 } 1391 1392 #----------------------------------------------------------------------------- 1393 1394 sub _accessorize { # A simple-minded method-maker 1395 no strict 'refs'; 1396 foreach my $attrname (@_) { 1397 next if $attrname =~ m/::/; # a hack 1398 *{caller() . '::' . $attrname} = sub { 1399 use strict; 1400 $Carp::CarpLevel = 1, Carp::croak( 1401 "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)" 1402 ) unless (@_ == 1 or @_ == 2) and ref $_[0]; 1403 (@_ == 1) ? $_[0]->{$attrname} 1404 : ($_[0]->{$attrname} = $_[1]); 1405 }; 1406 } 1407 # Ya know, they say accessories make the ensemble! 1408 return; 1409 } 1410 1411 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1412 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 1413 #============================================================================= 1414 1415 sub filter { 1416 my($class, $source) = @_; 1417 my $new = $class->new; 1418 $new->output_fh(*STDOUT{IO}); 1419 1420 if(ref($source || '') eq 'SCALAR') { 1421 $new->parse_string_document( $$source ); 1422 } elsif(ref($source)) { # it's a file handle 1423 $new->parse_file($source); 1424 } else { # it's a filename 1425 $new->parse_file($source); 1426 } 1427 1428 return $new; 1429 } 1430 1431 1432 #----------------------------------------------------------------------------- 1433 1434 sub _out { 1435 # For use in testing: Class->_out($source) 1436 # returns the transformation of $source 1437 1438 my $class = shift(@_); 1439 1440 my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE'; 1441 1442 DEBUG and print "\n\n", '#' x 76, 1443 "\nAbout to parse source: {{\n$_[0]\n}}\n\n"; 1444 1445 1446 my $parser = $class->new; 1447 $parser->hide_line_numbers(1); 1448 1449 my $out = ''; 1450 $parser->output_string( \$out ); 1451 DEBUG and print " _out to ", \$out, "\n"; 1452 1453 $mutor->($parser) if $mutor; 1454 1455 $parser->parse_string_document( $_[0] ); 1456 # use Data::Dumper; print Dumper($parser), "\n"; 1457 return $out; 1458 } 1459 1460 1461 sub _duo { 1462 # For use in testing: Class->_duo($source1, $source2) 1463 # returns the parse trees of $source1 and $source2. 1464 # Good in things like: &ok( Class->duo(... , ...) ); 1465 1466 my $class = shift(@_); 1467 1468 Carp::croak "But $class->_duo is useful only in list context!" 1469 unless wantarray; 1470 1471 my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE'; 1472 1473 Carp::croak "But $class->_duo takes two parameters, not: @_" 1474 unless @_ == 2; 1475 1476 my(@out); 1477 1478 while( @_ ) { 1479 my $parser = $class->new; 1480 1481 push @out, ''; 1482 $parser->output_string( \( $out[-1] ) ); 1483 1484 DEBUG and print " _duo out to ", $parser->output_string(), 1485 " = $parser->{'output_string'}\n"; 1486 1487 $parser->hide_line_numbers(1); 1488 $mutor->($parser) if $mutor; 1489 $parser->parse_string_document( shift( @_ ) ); 1490 # use Data::Dumper; print Dumper($parser), "\n"; 1491 } 1492 1493 return @out; 1494 } 1495 1496 1497 1498 #----------------------------------------------------------------------------- 1499 1; 1500 __END__ 1501 1502 TODO: 1503 A start_formatting_code and end_formatting_code methods, which in the 1504 base class call start_L, end_L, start_C, end_C, etc., if they are 1505 defined. 1506 1507 have the POD FORMATTING ERRORS section note the localtime, and the 1508 version of Pod::Simple. 1509 1510 option to delete all E<shy>s? 1511 option to scream if under-0x20 literals are found in the input, or 1512 under-E<32> E codes are found in the tree. And ditto \x7f-\x9f 1513 1514 Option to turn highbit characters into their compromised form? (applies 1515 to E parsing too) 1516 1517 TODO: BOM/encoding things. 1518 1519 TODO: ascii-compat things in the XML classes? 1520
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |