[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package File::Fetch; 2 3 use strict; 4 use FileHandle; 5 use File::Copy; 6 use File::Spec; 7 use File::Spec::Unix; 8 use File::Basename qw[dirname]; 9 10 use Cwd qw[cwd]; 11 use Carp qw[carp]; 12 use IPC::Cmd qw[can_run run]; 13 use File::Path qw[mkpath]; 14 use Params::Check qw[check]; 15 use Module::Load::Conditional qw[can_load]; 16 use Locale::Maketext::Simple Style => 'gettext'; 17 18 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT 19 $BLACKLIST $METHOD_FAIL $VERSION $METHODS 20 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN 21 ]; 22 23 use constant QUOTE => do { $^O eq 'MSWin32' ? q["] : q['] }; 24 25 26 $VERSION = '0.14'; 27 $VERSION = eval $VERSION; # avoid warnings with development releases 28 $PREFER_BIN = 0; # XXX TODO implement 29 $FROM_EMAIL = 'File-Fetch@example.com'; 30 $USER_AGENT = 'File::Fetch/$VERSION'; 31 $BLACKLIST = [qw|ftp|]; 32 $METHOD_FAIL = { }; 33 $FTP_PASSIVE = 1; 34 $TIMEOUT = 0; 35 $DEBUG = 0; 36 $WARN = 1; 37 38 ### methods available to fetch the file depending on the scheme 39 $METHODS = { 40 http => [ qw|lwp wget curl lynx| ], 41 ftp => [ qw|lwp netftp wget curl ncftp ftp| ], 42 file => [ qw|lwp file| ], 43 rsync => [ qw|rsync| ] 44 }; 45 46 ### silly warnings ### 47 local $Params::Check::VERBOSE = 1; 48 local $Params::Check::VERBOSE = 1; 49 local $Module::Load::Conditional::VERBOSE = 0; 50 local $Module::Load::Conditional::VERBOSE = 0; 51 52 ### see what OS we are on, important for file:// uris ### 53 use constant ON_WIN => ($^O eq 'MSWin32'); 54 use constant ON_VMS => ($^O eq 'VMS'); 55 use constant ON_UNIX => (!ON_WIN); 56 use constant HAS_VOL => (ON_WIN); 57 use constant HAS_SHARE => (ON_WIN); 58 =pod 59 60 =head1 NAME 61 62 File::Fetch - A generic file fetching mechanism 63 64 =head1 SYNOPSIS 65 66 use File::Fetch; 67 68 ### build a File::Fetch object ### 69 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); 70 71 ### fetch the uri to cwd() ### 72 my $where = $ff->fetch() or die $ff->error; 73 74 ### fetch the uri to /tmp ### 75 my $where = $ff->fetch( to => '/tmp' ); 76 77 ### parsed bits from the uri ### 78 $ff->uri; 79 $ff->scheme; 80 $ff->host; 81 $ff->path; 82 $ff->file; 83 84 =head1 DESCRIPTION 85 86 File::Fetch is a generic file fetching mechanism. 87 88 It allows you to fetch any file pointed to by a C<ftp>, C<http>, 89 C<file>, or C<rsync> uri by a number of different means. 90 91 See the C<HOW IT WORKS> section further down for details. 92 93 =head1 ACCESSORS 94 95 A C<File::Fetch> object has the following accessors 96 97 =over 4 98 99 =item $ff->uri 100 101 The uri you passed to the constructor 102 103 =item $ff->scheme 104 105 The scheme from the uri (like 'file', 'http', etc) 106 107 =item $ff->host 108 109 The hostname in the uri. Will be empty if host was originally 110 'localhost' for a 'file://' url. 111 112 =item $ff->vol 113 114 On operating systems with the concept of a volume the second element 115 of a file:// is considered to the be volume specification for the file. 116 Thus on Win32 this routine returns the volume, on other operating 117 systems this returns nothing. 118 119 On Windows this value may be empty if the uri is to a network share, in 120 which case the 'share' property will be defined. Additionally, volume 121 specifications that use '|' as ':' will be converted on read to use ':'. 122 123 On VMS, which has a volume concept, this field will be empty because VMS 124 file specifications are converted to absolute UNIX format and the volume 125 information is transparently included. 126 127 =item $ff->share 128 129 On systems with the concept of a network share (currently only Windows) returns 130 the sharename from a file://// url. On other operating systems returns empty. 131 132 =item $ff->path 133 134 The path from the uri, will be at least a single '/'. 135 136 =item $ff->file 137 138 The name of the remote file. For the local file name, the 139 result of $ff->output_file will be used. 140 141 =cut 142 143 144 ########################## 145 ### Object & Accessors ### 146 ########################## 147 148 { 149 ### template for new() and autogenerated accessors ### 150 my $Tmpl = { 151 scheme => { default => 'http' }, 152 host => { default => 'localhost' }, 153 path => { default => '/' }, 154 file => { required => 1 }, 155 uri => { required => 1 }, 156 vol => { default => '' }, # windows for file:// uris 157 share => { default => '' }, # windows for file:// uris 158 _error_msg => { no_override => 1 }, 159 _error_msg_long => { no_override => 1 }, 160 }; 161 162 for my $method ( keys %$Tmpl ) { 163 no strict 'refs'; 164 *$method = sub { 165 my $self = shift; 166 $self->{$method} = $_[0] if @_; 167 return $self->{$method}; 168 } 169 } 170 171 sub _create { 172 my $class = shift; 173 my %hash = @_; 174 175 my $args = check( $Tmpl, \%hash ) or return; 176 177 bless $args, $class; 178 179 if( lc($args->scheme) ne 'file' and not $args->host ) { 180 return File::Fetch->_error(loc( 181 "Hostname required when fetching from '%1'",$args->scheme)); 182 } 183 184 for (qw[path file]) { 185 unless( $args->$_() ) { # 5.5.x needs the () 186 return File::Fetch->_error(loc("No '%1' specified",$_)); 187 } 188 } 189 190 return $args; 191 } 192 } 193 194 =item $ff->output_file 195 196 The name of the output file. This is the same as $ff->file, 197 but any query parameters are stripped off. For example: 198 199 http://example.com/index.html?x=y 200 201 would make the output file be C<index.html> rather than 202 C<index.html?x=y>. 203 204 =back 205 206 =cut 207 208 sub output_file { 209 my $self = shift; 210 my $file = $self->file; 211 212 $file =~ s/\?.*$//g; 213 214 return $file; 215 } 216 217 ### XXX do this or just point to URI::Escape? 218 # =head2 $esc_uri = $ff->escaped_uri 219 # 220 # =cut 221 # 222 # ### most of this is stolen straight from URI::escape 223 # { ### Build a char->hex map 224 # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; 225 # 226 # sub escaped_uri { 227 # my $self = shift; 228 # my $uri = $self->uri; 229 # 230 # ### Default unsafe characters. RFC 2732 ^(uric - reserved) 231 # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ 232 # $escapes{$1} || $self->_fail_hi($1)/ge; 233 # 234 # return $uri; 235 # } 236 # 237 # sub _fail_hi { 238 # my $self = shift; 239 # my $char = shift; 240 # 241 # $self->_error(loc( 242 # "Can't escape '%1', try using the '%2' module instead", 243 # sprintf("\\x{%04X}", ord($char)), 'URI::Escape' 244 # )); 245 # } 246 # 247 # sub output_file { 248 # 249 # } 250 # 251 # 252 # } 253 254 =head1 METHODS 255 256 =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); 257 258 Parses the uri and creates a corresponding File::Fetch::Item object, 259 that is ready to be C<fetch>ed and returns it. 260 261 Returns false on failure. 262 263 =cut 264 265 sub new { 266 my $class = shift; 267 my %hash = @_; 268 269 my ($uri); 270 my $tmpl = { 271 uri => { required => 1, store => \$uri }, 272 }; 273 274 check( $tmpl, \%hash ) or return; 275 276 ### parse the uri to usable parts ### 277 my $href = __PACKAGE__->_parse_uri( $uri ) or return; 278 279 ### make it into a FFI object ### 280 my $ff = File::Fetch->_create( %$href ) or return; 281 282 283 ### return the object ### 284 return $ff; 285 } 286 287 ### parses an uri to a hash structure: 288 ### 289 ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) 290 ### 291 ### becomes: 292 ### 293 ### $href = { 294 ### scheme => 'ftp', 295 ### host => 'ftp.cpan.org', 296 ### path => '/pub/mirror', 297 ### file => 'index.html' 298 ### }; 299 ### 300 ### In the case of file:// urls there maybe be additional fields 301 ### 302 ### For systems with volume specifications such as Win32 there will be 303 ### a volume specifier provided in the 'vol' field. 304 ### 305 ### 'vol' => 'volumename' 306 ### 307 ### For windows file shares there may be a 'share' key specified 308 ### 309 ### 'share' => 'sharename' 310 ### 311 ### Note that the rules of what a file:// url means vary by the operating system 312 ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious 313 ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and 314 ### not '/foo/bar.txt' 315 ### 316 ### Similarly if the host interpreting the url is VMS then 317 ### file:///disk$user/my/notes/note12345.txt' means 318 ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as 319 ### if it is unix where it means /disk$user/my/notes/note12345.txt'. 320 ### Except for some cases in the File::Spec methods, Perl on VMS will generally 321 ### handle UNIX format file specifications. 322 ### 323 ### This means it is impossible to serve certain file:// urls on certain systems. 324 ### 325 ### Thus are the problems with a protocol-less specification. :-( 326 ### 327 328 sub _parse_uri { 329 my $self = shift; 330 my $uri = shift or return; 331 332 my $href = { uri => $uri }; 333 334 ### find the scheme ### 335 $uri =~ s|^(\w+)://||; 336 $href->{scheme} = $1; 337 338 ### See rfc 1738 section 3.10 339 ### http://www.faqs.org/rfcs/rfc1738.html 340 ### And wikipedia for more on windows file:// urls 341 ### http://en.wikipedia.org/wiki/File:// 342 if( $href->{scheme} eq 'file' ) { 343 344 my @parts = split '/',$uri; 345 346 ### file://hostname/... 347 ### file://hostname/... 348 ### normalize file://localhost with file:/// 349 $href->{host} = $parts[0] || ''; 350 351 ### index in @parts where the path components begin; 352 my $index = 1; 353 354 ### file:////hostname/sharename/blah.txt 355 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { 356 357 $href->{host} = $parts[2] || ''; # avoid warnings 358 $href->{share} = $parts[3] || ''; # avoid warnings 359 360 $index = 4 # index after the share 361 362 ### file:///D|/blah.txt 363 ### file:///D:/blah.txt 364 } elsif (HAS_VOL) { 365 366 ### this code comes from dmq's patch, but: 367 ### XXX if volume is empty, wouldn't that be an error? --kane 368 ### if so, our file://localhost test needs to be fixed as wel 369 $href->{vol} = $parts[1] || ''; 370 371 ### correct D| style colume descriptors 372 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; 373 374 $index = 2; # index after the volume 375 } 376 377 ### rebuild the path from the leftover parts; 378 $href->{path} = join '/', '', splice( @parts, $index, $#parts ); 379 380 } else { 381 ### using anything but qw() in hash slices may produce warnings 382 ### in older perls :-( 383 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; 384 } 385 386 ### split the path into file + dir ### 387 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); 388 $href->{path} = $parts[1]; 389 $href->{file} = $parts[2]; 390 } 391 392 ### host will be empty if the target was 'localhost' and the 393 ### scheme was 'file' 394 $href->{host} = '' if ($href->{host} eq 'localhost') and 395 ($href->{scheme} eq 'file'); 396 397 return $href; 398 } 399 400 =head2 $ff->fetch( [to => /my/output/dir/] ) 401 402 Fetches the file you requested. By default it writes to C<cwd()>, 403 but you can override that by specifying the C<to> argument. 404 405 Returns the full path to the downloaded file on success, and false 406 on failure. 407 408 =cut 409 410 sub fetch { 411 my $self = shift or return; 412 my %hash = @_; 413 414 my $to; 415 my $tmpl = { 416 to => { default => cwd(), store => \$to }, 417 }; 418 419 check( $tmpl, \%hash ) or return; 420 421 ### On VMS force to VMS format so File::Spec will work. 422 $to = VMS::Filespec::vmspath($to) if ON_VMS; 423 424 ### create the path if it doesn't exist yet ### 425 unless( -d $to ) { 426 eval { mkpath( $to ) }; 427 428 return $self->_error(loc("Could not create path '%1'",$to)) if $@; 429 } 430 431 ### set passive ftp if required ### 432 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; 433 434 ### we dont use catfile on win32 because if we are using a cygwin tool 435 ### under cmd.exe they wont understand windows style separators. 436 my $out_to = ON_WIN ? $to.'/'.$self->output_file 437 : File::Spec->catfile( $to, $self->output_file ); 438 439 for my $method ( @{ $METHODS->{$self->scheme} } ) { 440 my $sub = '_'.$method.'_fetch'; 441 442 unless( __PACKAGE__->can($sub) ) { 443 $self->_error(loc("Cannot call method for '%1' -- WEIRD!", 444 $method)); 445 next; 446 } 447 448 ### method is blacklisted ### 449 next if grep { lc $_ eq $method } @$BLACKLIST; 450 451 ### method is known to fail ### 452 next if $METHOD_FAIL->{$method}; 453 454 ### there's serious issues with IPC::Run and quoting of command 455 ### line arguments. using quotes in the wrong place breaks things, 456 ### and in the case of say, 457 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document 458 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2" 459 ### it doesn't matter how you quote, it always fails. 460 local $IPC::Cmd::USE_IPC_RUN = 0; 461 462 if( my $file = $self->$sub( 463 to => $out_to 464 )){ 465 466 unless( -e $file && -s _ ) { 467 $self->_error(loc("'%1' said it fetched '%2', ". 468 "but it was not created",$method,$file)); 469 470 ### mark the failure ### 471 $METHOD_FAIL->{$method} = 1; 472 473 next; 474 475 } else { 476 477 my $abs = File::Spec->rel2abs( $file ); 478 return $abs; 479 } 480 } 481 } 482 483 484 ### if we got here, we looped over all methods, but we weren't able 485 ### to fetch it. 486 return; 487 } 488 489 ######################## 490 ### _*_fetch methods ### 491 ######################## 492 493 ### LWP fetching ### 494 sub _lwp_fetch { 495 my $self = shift; 496 my %hash = @_; 497 498 my ($to); 499 my $tmpl = { 500 to => { required => 1, store => \$to } 501 }; 502 check( $tmpl, \%hash ) or return; 503 504 ### modules required to download with lwp ### 505 my $use_list = { 506 LWP => '0.0', 507 'LWP::UserAgent' => '0.0', 508 'HTTP::Request' => '0.0', 509 'HTTP::Status' => '0.0', 510 URI => '0.0', 511 512 }; 513 514 if( can_load(modules => $use_list) ) { 515 516 ### setup the uri object 517 my $uri = URI->new( File::Spec::Unix->catfile( 518 $self->path, $self->file 519 ) ); 520 521 ### special rules apply for file:// uris ### 522 $uri->scheme( $self->scheme ); 523 $uri->host( $self->scheme eq 'file' ? '' : $self->host ); 524 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; 525 526 ### set up the useragent object 527 my $ua = LWP::UserAgent->new(); 528 $ua->timeout( $TIMEOUT ) if $TIMEOUT; 529 $ua->agent( $USER_AGENT ); 530 $ua->from( $FROM_EMAIL ); 531 $ua->env_proxy; 532 533 my $res = $ua->mirror($uri, $to) or return; 534 535 ### uptodate or fetched ok ### 536 if ( $res->code == 304 or $res->code == 200 ) { 537 return $to; 538 539 } else { 540 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", 541 $res->code, HTTP::Status::status_message($res->code), 542 $res->status_line)); 543 } 544 545 } else { 546 $METHOD_FAIL->{'lwp'} = 1; 547 return; 548 } 549 } 550 551 ### Net::FTP fetching 552 sub _netftp_fetch { 553 my $self = shift; 554 my %hash = @_; 555 556 my ($to); 557 my $tmpl = { 558 to => { required => 1, store => \$to } 559 }; 560 check( $tmpl, \%hash ) or return; 561 562 ### required modules ### 563 my $use_list = { 'Net::FTP' => 0 }; 564 565 if( can_load( modules => $use_list ) ) { 566 567 ### make connection ### 568 my $ftp; 569 my @options = ($self->host); 570 push(@options, Timeout => $TIMEOUT) if $TIMEOUT; 571 unless( $ftp = Net::FTP->new( @options ) ) { 572 return $self->_error(loc("Ftp creation failed: %1",$@)); 573 } 574 575 ### login ### 576 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { 577 return $self->_error(loc("Could not login to '%1'",$self->host)); 578 } 579 580 ### set binary mode, just in case ### 581 $ftp->binary; 582 583 ### create the remote path 584 ### remember remote paths are unix paths! [#11483] 585 my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); 586 587 ### fetch the file ### 588 my $target; 589 unless( $target = $ftp->get( $remote, $to ) ) { 590 return $self->_error(loc("Could not fetch '%1' from '%2'", 591 $remote, $self->host)); 592 } 593 594 ### log out ### 595 $ftp->quit; 596 597 return $target; 598 599 } else { 600 $METHOD_FAIL->{'netftp'} = 1; 601 return; 602 } 603 } 604 605 ### /bin/wget fetch ### 606 sub _wget_fetch { 607 my $self = shift; 608 my %hash = @_; 609 610 my ($to); 611 my $tmpl = { 612 to => { required => 1, store => \$to } 613 }; 614 check( $tmpl, \%hash ) or return; 615 616 ### see if we have a wget binary ### 617 if( my $wget = can_run('wget') ) { 618 619 ### no verboseness, thanks ### 620 my $cmd = [ $wget, '--quiet' ]; 621 622 ### if a timeout is set, add it ### 623 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 624 625 ### run passive if specified ### 626 push @$cmd, '--passive-ftp' if $FTP_PASSIVE; 627 628 ### set the output document, add the uri ### 629 push @$cmd, '--output-document', 630 ### DO NOT quote things for IPC::Run, it breaks stuff. 631 $IPC::Cmd::USE_IPC_RUN 632 ? ($to, $self->uri) 633 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 634 635 ### shell out ### 636 my $captured; 637 unless(run( command => $cmd, 638 buffer => \$captured, 639 verbose => $DEBUG 640 )) { 641 ### wget creates the output document always, even if the fetch 642 ### fails.. so unlink it in that case 643 1 while unlink $to; 644 645 return $self->_error(loc( "Command failed: %1", $captured || '' )); 646 } 647 648 return $to; 649 650 } else { 651 $METHOD_FAIL->{'wget'} = 1; 652 return; 653 } 654 } 655 656 657 ### /bin/ftp fetch ### 658 sub _ftp_fetch { 659 my $self = shift; 660 my %hash = @_; 661 662 my ($to); 663 my $tmpl = { 664 to => { required => 1, store => \$to } 665 }; 666 check( $tmpl, \%hash ) or return; 667 668 ### see if we have a ftp binary ### 669 if( my $ftp = can_run('ftp') ) { 670 671 my $fh = FileHandle->new; 672 673 local $SIG{CHLD} = 'IGNORE'; 674 675 unless ($fh->open("|$ftp -n")) { 676 return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); 677 } 678 679 my @dialog = ( 680 "lcd " . dirname($to), 681 "open " . $self->host, 682 "user anonymous $FROM_EMAIL", 683 "cd /", 684 "cd " . $self->path, 685 "binary", 686 "get " . $self->file . " " . $self->output_file, 687 "quit", 688 ); 689 690 foreach (@dialog) { $fh->print($_, "\n") } 691 $fh->close or return; 692 693 return $to; 694 } 695 } 696 697 ### lynx is stupid - it decompresses any .gz file it finds to be text 698 ### use /bin/lynx to fetch files 699 sub _lynx_fetch { 700 my $self = shift; 701 my %hash = @_; 702 703 my ($to); 704 my $tmpl = { 705 to => { required => 1, store => \$to } 706 }; 707 check( $tmpl, \%hash ) or return; 708 709 ### see if we have a lynx binary ### 710 if( my $lynx = can_run('lynx') ) { 711 712 unless( IPC::Cmd->can_capture_buffer ) { 713 $METHOD_FAIL->{'lynx'} = 1; 714 715 return $self->_error(loc( 716 "Can not capture buffers. Can not use '%1' to fetch files", 717 'lynx' )); 718 } 719 720 ### write to the output file ourselves, since lynx ass_u_mes to much 721 my $local = FileHandle->new(">$to") 722 or return $self->_error(loc( 723 "Could not open '%1' for writing: %2",$to,$!)); 724 725 ### dump to stdout ### 726 my $cmd = [ 727 $lynx, 728 '-source', 729 "-auth=anonymous:$FROM_EMAIL", 730 ]; 731 732 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; 733 734 ### DO NOT quote things for IPC::Run, it breaks stuff. 735 push @$cmd, $IPC::Cmd::USE_IPC_RUN 736 ? $self->uri 737 : QUOTE. $self->uri .QUOTE; 738 739 740 ### shell out ### 741 my $captured; 742 unless(run( command => $cmd, 743 buffer => \$captured, 744 verbose => $DEBUG ) 745 ) { 746 return $self->_error(loc("Command failed: %1", $captured || '')); 747 } 748 749 ### print to local file ### 750 ### XXX on a 404 with a special error page, $captured will actually 751 ### hold the contents of that page, and make it *appear* like the 752 ### request was a success, when really it wasn't :( 753 ### there doesn't seem to be an option for lynx to change the exit 754 ### code based on a 4XX status or so. 755 ### the closest we can come is using --error_file and parsing that, 756 ### which is very unreliable ;( 757 $local->print( $captured ); 758 $local->close or return; 759 760 return $to; 761 762 } else { 763 $METHOD_FAIL->{'lynx'} = 1; 764 return; 765 } 766 } 767 768 ### use /bin/ncftp to fetch files 769 sub _ncftp_fetch { 770 my $self = shift; 771 my %hash = @_; 772 773 my ($to); 774 my $tmpl = { 775 to => { required => 1, store => \$to } 776 }; 777 check( $tmpl, \%hash ) or return; 778 779 ### we can only set passive mode in interactive sesssions, so bail out 780 ### if $FTP_PASSIVE is set 781 return if $FTP_PASSIVE; 782 783 ### see if we have a ncftp binary ### 784 if( my $ncftp = can_run('ncftp') ) { 785 786 my $cmd = [ 787 $ncftp, 788 '-V', # do not be verbose 789 '-p', $FROM_EMAIL, # email as password 790 $self->host, # hostname 791 dirname($to), # local dir for the file 792 # remote path to the file 793 ### DO NOT quote things for IPC::Run, it breaks stuff. 794 $IPC::Cmd::USE_IPC_RUN 795 ? File::Spec::Unix->catdir( $self->path, $self->file ) 796 : QUOTE. File::Spec::Unix->catdir( 797 $self->path, $self->file ) .QUOTE 798 799 ]; 800 801 ### shell out ### 802 my $captured; 803 unless(run( command => $cmd, 804 buffer => \$captured, 805 verbose => $DEBUG ) 806 ) { 807 return $self->_error(loc("Command failed: %1", $captured || '')); 808 } 809 810 return $to; 811 812 } else { 813 $METHOD_FAIL->{'ncftp'} = 1; 814 return; 815 } 816 } 817 818 ### use /bin/curl to fetch files 819 sub _curl_fetch { 820 my $self = shift; 821 my %hash = @_; 822 823 my ($to); 824 my $tmpl = { 825 to => { required => 1, store => \$to } 826 }; 827 check( $tmpl, \%hash ) or return; 828 829 if (my $curl = can_run('curl')) { 830 831 ### these long opts are self explanatory - I like that -jmb 832 my $cmd = [ $curl ]; 833 834 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; 835 836 push(@$cmd, '--silent') unless $DEBUG; 837 838 ### curl does the right thing with passive, regardless ### 839 if ($self->scheme eq 'ftp') { 840 push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); 841 } 842 843 ### curl doesn't follow 302 (temporarily moved) etc automatically 844 ### so we add --location to enable that. 845 push @$cmd, '--fail', '--location', '--output', 846 ### DO NOT quote things for IPC::Run, it breaks stuff. 847 $IPC::Cmd::USE_IPC_RUN 848 ? ($to, $self->uri) 849 : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); 850 851 my $captured; 852 unless(run( command => $cmd, 853 buffer => \$captured, 854 verbose => $DEBUG ) 855 ) { 856 857 return $self->_error(loc("Command failed: %1", $captured || '')); 858 } 859 860 return $to; 861 862 } else { 863 $METHOD_FAIL->{'curl'} = 1; 864 return; 865 } 866 } 867 868 869 ### use File::Copy for fetching file:// urls ### 870 ### 871 ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) 872 ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) 873 ### 874 875 sub _file_fetch { 876 my $self = shift; 877 my %hash = @_; 878 879 my ($to); 880 my $tmpl = { 881 to => { required => 1, store => \$to } 882 }; 883 check( $tmpl, \%hash ) or return; 884 885 886 887 ### prefix a / on unix systems with a file uri, since it would 888 ### look somewhat like this: 889 ### file:///home/kane/file 890 ### wheras windows file uris for 'c:\some\dir\file' might look like: 891 ### file:///C:/some/dir/file 892 ### file:///C|/some/dir/file 893 ### or for a network share '\\host\share\some\dir\file': 894 ### file:////host/share/some/dir/file 895 ### 896 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: 897 ### file://vms.host.edu/disk$user/my/notes/note12345.txt 898 ### 899 900 my $path = $self->path; 901 my $vol = $self->vol; 902 my $share = $self->share; 903 904 my $remote; 905 if (!$share and $self->host) { 906 return $self->_error(loc( 907 "Currently %1 cannot handle hosts in %2 urls", 908 'File::Fetch', 'file://' 909 )); 910 } 911 912 if( $vol ) { 913 $path = File::Spec->catdir( split /\//, $path ); 914 $remote = File::Spec->catpath( $vol, $path, $self->file); 915 916 } elsif( $share ) { 917 ### win32 specific, and a share name, so we wont bother with File::Spec 918 $path =~ s|/+|\\|g; 919 $remote = "\\\\".$self->host."\\$share\\$path"; 920 921 } else { 922 ### File::Spec on VMS can not currently handle UNIX syntax. 923 my $file_class = ON_VMS 924 ? 'File::Spec::Unix' 925 : 'File::Spec'; 926 927 $remote = $file_class->catfile( $path, $self->file ); 928 } 929 930 ### File::Copy is littered with 'die' statements :( ### 931 my $rv = eval { File::Copy::copy( $remote, $to ) }; 932 933 ### something went wrong ### 934 if( !$rv or $@ ) { 935 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", 936 $remote, $to, $!, $@)); 937 } 938 939 return $to; 940 } 941 942 ### use /usr/bin/rsync to fetch files 943 sub _rsync_fetch { 944 my $self = shift; 945 my %hash = @_; 946 947 my ($to); 948 my $tmpl = { 949 to => { required => 1, store => \$to } 950 }; 951 check( $tmpl, \%hash ) or return; 952 953 if (my $rsync = can_run('rsync')) { 954 955 my $cmd = [ $rsync ]; 956 957 ### XXX: rsync has no I/O timeouts at all, by default 958 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; 959 960 push(@$cmd, '--quiet') unless $DEBUG; 961 962 ### DO NOT quote things for IPC::Run, it breaks stuff. 963 push @$cmd, $IPC::Cmd::USE_IPC_RUN 964 ? ($self->uri, $to) 965 : (QUOTE. $self->uri .QUOTE, QUOTE. $to .QUOTE); 966 967 my $captured; 968 unless(run( command => $cmd, 969 buffer => \$captured, 970 verbose => $DEBUG ) 971 ) { 972 973 return $self->_error(loc("Command %1 failed: %2", 974 "@$cmd" || '', $captured || '')); 975 } 976 977 return $to; 978 979 } else { 980 $METHOD_FAIL->{'rsync'} = 1; 981 return; 982 } 983 } 984 985 ################################# 986 # 987 # Error code 988 # 989 ################################# 990 991 =pod 992 993 =head2 $ff->error([BOOL]) 994 995 Returns the last encountered error as string. 996 Pass it a true value to get the C<Carp::longmess()> output instead. 997 998 =cut 999 1000 ### error handling the way Archive::Extract does it 1001 sub _error { 1002 my $self = shift; 1003 my $error = shift; 1004 1005 $self->_error_msg( $error ); 1006 $self->_error_msg_long( Carp::longmess($error) ); 1007 1008 if( $WARN ) { 1009 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; 1010 } 1011 1012 return; 1013 } 1014 1015 sub error { 1016 my $self = shift; 1017 return shift() ? $self->_error_msg_long : $self->_error_msg; 1018 } 1019 1020 1021 1; 1022 1023 =pod 1024 1025 =head1 HOW IT WORKS 1026 1027 File::Fetch is able to fetch a variety of uris, by using several 1028 external programs and modules. 1029 1030 Below is a mapping of what utilities will be used in what order 1031 for what schemes, if available: 1032 1033 file => LWP, file 1034 http => LWP, wget, curl, lynx 1035 ftp => LWP, Net::FTP, wget, curl, ncftp, ftp 1036 rsync => rsync 1037 1038 If you'd like to disable the use of one or more of these utilities 1039 and/or modules, see the C<$BLACKLIST> variable further down. 1040 1041 If a utility or module isn't available, it will be marked in a cache 1042 (see the C<$METHOD_FAIL> variable further down), so it will not be 1043 tried again. The C<fetch> method will only fail when all options are 1044 exhausted, and it was not able to retrieve the file. 1045 1046 A special note about fetching files from an ftp uri: 1047 1048 By default, all ftp connections are done in passive mode. To change 1049 that, see the C<$FTP_PASSIVE> variable further down. 1050 1051 Furthermore, ftp uris only support anonymous connections, so no 1052 named user/password pair can be passed along. 1053 1054 C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable 1055 further down. 1056 1057 =head1 GLOBAL VARIABLES 1058 1059 The behaviour of File::Fetch can be altered by changing the following 1060 global variables: 1061 1062 =head2 $File::Fetch::FROM_EMAIL 1063 1064 This is the email address that will be sent as your anonymous ftp 1065 password. 1066 1067 Default is C<File-Fetch@example.com>. 1068 1069 =head2 $File::Fetch::USER_AGENT 1070 1071 This is the useragent as C<LWP> will report it. 1072 1073 Default is C<File::Fetch/$VERSION>. 1074 1075 =head2 $File::Fetch::FTP_PASSIVE 1076 1077 This variable controls whether the environment variable C<FTP_PASSIVE> 1078 and any passive switches to commandline tools will be set to true. 1079 1080 Default value is 1. 1081 1082 Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch 1083 files, since passive mode can only be set interactively for this binary 1084 1085 =head2 $File::Fetch::TIMEOUT 1086 1087 When set, controls the network timeout (counted in seconds). 1088 1089 Default value is 0. 1090 1091 =head2 $File::Fetch::WARN 1092 1093 This variable controls whether errors encountered internally by 1094 C<File::Fetch> should be C<carp>'d or not. 1095 1096 Set to false to silence warnings. Inspect the output of the C<error()> 1097 method manually to see what went wrong. 1098 1099 Defaults to C<true>. 1100 1101 =head2 $File::Fetch::DEBUG 1102 1103 This enables debugging output when calling commandline utilities to 1104 fetch files. 1105 This also enables C<Carp::longmess> errors, instead of the regular 1106 C<carp> errors. 1107 1108 Good for tracking down why things don't work with your particular 1109 setup. 1110 1111 Default is 0. 1112 1113 =head2 $File::Fetch::BLACKLIST 1114 1115 This is an array ref holding blacklisted modules/utilities for fetching 1116 files with. 1117 1118 To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could 1119 set $File::Fetch::BLACKLIST to: 1120 1121 $File::Fetch::BLACKLIST = [qw|lwp netftp|] 1122 1123 The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. 1124 1125 See the note on C<MAPPING> below. 1126 1127 =head2 $File::Fetch::METHOD_FAIL 1128 1129 This is a hashref registering what modules/utilities were known to fail 1130 for fetching files (mostly because they weren't installed). 1131 1132 You can reset this cache by assigning an empty hashref to it, or 1133 individually remove keys. 1134 1135 See the note on C<MAPPING> below. 1136 1137 =head1 MAPPING 1138 1139 1140 Here's a quick mapping for the utilities/modules, and their names for 1141 the $BLACKLIST, $METHOD_FAIL and other internal functions. 1142 1143 LWP => lwp 1144 Net::FTP => netftp 1145 wget => wget 1146 lynx => lynx 1147 ncftp => ncftp 1148 ftp => ftp 1149 curl => curl 1150 rsync => rsync 1151 1152 =head1 FREQUENTLY ASKED QUESTIONS 1153 1154 =head2 So how do I use a proxy with File::Fetch? 1155 1156 C<File::Fetch> currently only supports proxies with LWP::UserAgent. 1157 You will need to set your environment variables accordingly. For 1158 example, to use an ftp proxy: 1159 1160 $ENV{ftp_proxy} = 'foo.com'; 1161 1162 Refer to the LWP::UserAgent manpage for more details. 1163 1164 =head2 I used 'lynx' to fetch a file, but its contents is all wrong! 1165 1166 C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, 1167 which we in turn capture. If that content is a 'custom' error file 1168 (like, say, a C<404 handler>), you will get that contents instead. 1169 1170 Sadly, C<lynx> doesn't support any options to return a different exit 1171 code on non-C<200 OK> status, giving us no way to tell the difference 1172 between a 'successfull' fetch and a custom error page. 1173 1174 Therefor, we recommend to only use C<lynx> as a last resort. This is 1175 why it is at the back of our list of methods to try as well. 1176 1177 =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? 1178 1179 C<File::Fetch> is relatively smart about things. When trying to write 1180 a file to disk, it removes the C<query parameters> (see the 1181 C<output_file> method for details) from the file name before creating 1182 it. In most cases this suffices. 1183 1184 If you have any other characters you need to escape, please install 1185 the C<URI::Escape> module from CPAN, and pre-encode your URI before 1186 passing it to C<File::Fetch>. You can read about the details of URIs 1187 and URI encoding here: 1188 1189 http://www.faqs.org/rfcs/rfc2396.html 1190 1191 =head1 TODO 1192 1193 =over 4 1194 1195 =item Implement $PREFER_BIN 1196 1197 To indicate to rather use commandline tools than modules 1198 1199 =back 1200 1201 =head1 BUG REPORTS 1202 1203 Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>. 1204 1205 =head1 AUTHOR 1206 1207 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 1208 1209 =head1 COPYRIGHT 1210 1211 This library is free software; you may redistribute and/or modify it 1212 under the same terms as Perl itself. 1213 1214 1215 =cut 1216 1217 # Local variables: 1218 # c-indentation-style: bsd 1219 # c-basic-offset: 4 1220 # indent-tabs-mode: nil 1221 # End: 1222 # vim: expandtab shiftwidth=4: 1223 1224 1225 1226
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |