[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 package File::GlobMapper; 2 3 use strict; 4 use warnings; 5 use Carp; 6 7 our ($CSH_GLOB); 8 9 BEGIN 10 { 11 if ($] < 5.006) 12 { 13 require File::BSDGlob; import File::BSDGlob qw(:glob) ; 14 $CSH_GLOB = File::BSDGlob::GLOB_CSH() ; 15 *globber = \&File::BSDGlob::csh_glob; 16 } 17 else 18 { 19 require File::Glob; import File::Glob qw(:glob) ; 20 $CSH_GLOB = File::Glob::GLOB_CSH() ; 21 #*globber = \&File::Glob::bsd_glob; 22 *globber = \&File::Glob::csh_glob; 23 } 24 } 25 26 our ($Error); 27 28 our ($VERSION, @EXPORT_OK); 29 $VERSION = '0.000_02'; 30 @EXPORT_OK = qw( globmap ); 31 32 33 our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount); 34 $noPreBS = '(?<!\\\)' ; # no preceeding backslash 35 $metachars = '.*?[](){}'; 36 $matchMetaRE = '[' . quotemeta($metachars) . ']'; 37 38 %mapping = ( 39 '*' => '([^/]*)', 40 '?' => '([^/])', 41 '.' => '\.', 42 '[' => '([', 43 '(' => '(', 44 ')' => ')', 45 ); 46 47 %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /; 48 49 sub globmap ($$;) 50 { 51 my $inputGlob = shift ; 52 my $outputGlob = shift ; 53 54 my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_) 55 or croak "globmap: $Error" ; 56 return $obj->getFileMap(); 57 } 58 59 sub new 60 { 61 my $class = shift ; 62 my $inputGlob = shift ; 63 my $outputGlob = shift ; 64 # TODO -- flags needs to default to whatever File::Glob does 65 my $flags = shift || $CSH_GLOB ; 66 #my $flags = shift ; 67 68 $inputGlob =~ s/^\s*\<\s*//; 69 $inputGlob =~ s/\s*\>\s*$//; 70 71 $outputGlob =~ s/^\s*\<\s*//; 72 $outputGlob =~ s/\s*\>\s*$//; 73 74 my %object = 75 ( InputGlob => $inputGlob, 76 OutputGlob => $outputGlob, 77 GlobFlags => $flags, 78 Braces => 0, 79 WildCount => 0, 80 Pairs => [], 81 Sigil => '#', 82 ); 83 84 my $self = bless \%object, ref($class) || $class ; 85 86 $self->_parseInputGlob() 87 or return undef ; 88 89 $self->_parseOutputGlob() 90 or return undef ; 91 92 my @inputFiles = globber($self->{InputGlob}, $flags) ; 93 94 if (GLOB_ERROR) 95 { 96 $Error = $!; 97 return undef ; 98 } 99 100 #if (whatever) 101 { 102 my $missing = grep { ! -e $_ } @inputFiles ; 103 104 if ($missing) 105 { 106 $Error = "$missing input files do not exist"; 107 return undef ; 108 } 109 } 110 111 $self->{InputFiles} = \@inputFiles ; 112 113 $self->_getFiles() 114 or return undef ; 115 116 return $self; 117 } 118 119 sub _retError 120 { 121 my $string = shift ; 122 $Error = "$string in input fileglob" ; 123 return undef ; 124 } 125 126 sub _unmatched 127 { 128 my $delimeter = shift ; 129 130 _retError("Unmatched $delimeter"); 131 return undef ; 132 } 133 134 sub _parseBit 135 { 136 my $self = shift ; 137 138 my $string = shift ; 139 140 my $out = ''; 141 my $depth = 0 ; 142 143 while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//) 144 { 145 $out .= quotemeta($1) ; 146 $out .= $mapping{$2} if defined $mapping{$2}; 147 148 ++ $self->{WildCount} if $wildCount{$2} ; 149 150 if ($2 eq ',') 151 { 152 return _unmatched "(" 153 if $depth ; 154 155 $out .= '|'; 156 } 157 elsif ($2 eq '(') 158 { 159 ++ $depth ; 160 } 161 elsif ($2 eq ')') 162 { 163 return _unmatched ")" 164 if ! $depth ; 165 166 -- $depth ; 167 } 168 elsif ($2 eq '[') 169 { 170 # TODO -- quotemeta & check no '/' 171 # TODO -- check for \] & other \ within the [] 172 $string =~ s#(.*?\])## 173 or return _unmatched "[" ; 174 $out .= "$1)" ; 175 } 176 elsif ($2 eq ']') 177 { 178 return _unmatched "]" ; 179 } 180 elsif ($2 eq '{' || $2 eq '}') 181 { 182 return _retError "Nested {} not allowed" ; 183 } 184 } 185 186 $out .= quotemeta $string; 187 188 return _unmatched "(" 189 if $depth ; 190 191 return $out ; 192 } 193 194 sub _parseInputGlob 195 { 196 my $self = shift ; 197 198 my $string = $self->{InputGlob} ; 199 my $inGlob = ''; 200 201 # Multiple concatenated *'s don't make sense 202 #$string =~ s#\*\*+#*# ; 203 204 # TODO -- Allow space to delimit patterns? 205 #my @strings = split /\s+/, $string ; 206 #for my $str (@strings) 207 my $out = ''; 208 my $depth = 0 ; 209 210 while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//) 211 { 212 $out .= quotemeta($1) ; 213 $out .= $mapping{$2} if defined $mapping{$2}; 214 ++ $self->{WildCount} if $wildCount{$2} ; 215 216 if ($2 eq '(') 217 { 218 ++ $depth ; 219 } 220 elsif ($2 eq ')') 221 { 222 return _unmatched ")" 223 if ! $depth ; 224 225 -- $depth ; 226 } 227 elsif ($2 eq '[') 228 { 229 # TODO -- quotemeta & check no '/' or '(' or ')' 230 # TODO -- check for \] & other \ within the [] 231 $string =~ s#(.*?\])## 232 or return _unmatched "["; 233 $out .= "$1)" ; 234 } 235 elsif ($2 eq ']') 236 { 237 return _unmatched "]" ; 238 } 239 elsif ($2 eq '}') 240 { 241 return _unmatched "}" ; 242 } 243 elsif ($2 eq '{') 244 { 245 # TODO -- check no '/' within the {} 246 # TODO -- check for \} & other \ within the {} 247 248 my $tmp ; 249 unless ( $string =~ s/(.*?)$noPreBS\}//) 250 { 251 return _unmatched "{"; 252 } 253 #$string =~ s#(.*?)\}##; 254 255 #my $alt = join '|', 256 # map { quotemeta $_ } 257 # split "$noPreBS,", $1 ; 258 my $alt = $self->_parseBit($1); 259 defined $alt or return 0 ; 260 $out .= "($alt)" ; 261 262 ++ $self->{Braces} ; 263 } 264 } 265 266 return _unmatched "(" 267 if $depth ; 268 269 $out .= quotemeta $string ; 270 271 272 $self->{InputGlob} =~ s/$noPreBS[\(\)]//g; 273 $self->{InputPattern} = $out ; 274 275 #print "# INPUT '$self->{InputGlob}' => '$out'\n"; 276 277 return 1 ; 278 279 } 280 281 sub _parseOutputGlob 282 { 283 my $self = shift ; 284 285 my $string = $self->{OutputGlob} ; 286 my $maxwild = $self->{WildCount}; 287 288 if ($self->{GlobFlags} & GLOB_TILDE) 289 #if (1) 290 { 291 $string =~ s{ 292 ^ ~ # find a leading tilde 293 ( # save this in $1 294 [^/] # a non-slash character 295 * # repeated 0 or more times (0 means me) 296 ) 297 }{ 298 $1 299 ? (getpwnam($1))[7] 300 : ( $ENV{HOME} || $ENV{LOGDIR} ) 301 }ex; 302 303 } 304 305 # max #1 must be == to max no of '*' in input 306 while ( $string =~ m/#(\d)/g ) 307 { 308 croak "Max wild is #$maxwild, you tried #$1" 309 if $1 > $maxwild ; 310 } 311 312 my $noPreBS = '(?<!\\\)' ; # no preceeding backslash 313 #warn "noPreBS = '$noPreBS'\n"; 314 315 #$string =~ s/${noPreBS}\$(\d)/\${$1}/g; 316 $string =~ s/$noPreBS}#(\d)/\${$1}/g; 317 $string =~ s#$noPreBS}\*#\$inFile}#g; 318 $string = '"' . $string . '"'; 319 320 #print "OUTPUT '$self->{OutputGlob}' => '$string'\n"; 321 $self->{OutputPattern} = $string ; 322 323 return 1 ; 324 } 325 326 sub _getFiles 327 { 328 my $self = shift ; 329 330 my %outInMapping = (); 331 my %inFiles = () ; 332 333 foreach my $inFile (@{ $self->{InputFiles} }) 334 { 335 next if $inFiles{$inFile} ++ ; 336 337 my $outFile = $inFile ; 338 339 if ( $inFile =~ m/$self->{InputPattern}/ ) 340 { 341 no warnings 'uninitialized'; 342 eval "\$outFile = $self->{OutputPattern};" ; 343 344 if (defined $outInMapping{$outFile}) 345 { 346 $Error = "multiple input files map to one output file"; 347 return undef ; 348 } 349 $outInMapping{$outFile} = $inFile; 350 push @{ $self->{Pairs} }, [$inFile, $outFile]; 351 } 352 } 353 354 return 1 ; 355 } 356 357 sub getFileMap 358 { 359 my $self = shift ; 360 361 return $self->{Pairs} ; 362 } 363 364 sub getHash 365 { 366 my $self = shift ; 367 368 return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ; 369 } 370 371 1; 372 373 __END__ 374 375 =head1 NAME 376 377 File::GlobMapper - Extend File Glob to Allow Input and Output Files 378 379 =head1 SYNOPSIS 380 381 use File::GlobMapper qw( globmap ); 382 383 my $aref = globmap $input => $output 384 or die $File::GlobMapper::Error ; 385 386 my $gm = new File::GlobMapper $input => $output 387 or die $File::GlobMapper::Error ; 388 389 390 =head1 DESCRIPTION 391 392 B<WARNING Alpha Release Alert!> 393 394 =over 5 395 396 =item * This code is a work in progress. 397 398 =item * There are known bugs. 399 400 =item * The interface defined here is tentative. 401 402 =item * There are portability issues. 403 404 =item * Do not use in production code. 405 406 =item * Consider yourself warned! 407 408 =back 409 410 This module needs Perl5.005 or better. 411 412 This module takes the existing C<File::Glob> module as a starting point and 413 extends it to allow new filenames to be derived from the files matched by 414 C<File::Glob>. 415 416 This can be useful when carrying out batch operations on multiple files that 417 have both an input filename and output filename and the output file can be 418 derived from the input filename. Examples of operations where this can be 419 useful include, file renaming, file copying and file compression. 420 421 422 =head2 Behind The Scenes 423 424 To help explain what C<File::GlobMapper> does, consider what code you 425 would write if you wanted to rename all files in the current directory 426 that ended in C<.tar.gz> to C<.tgz>. So say these files are in the 427 current directory 428 429 alpha.tar.gz 430 beta.tar.gz 431 gamma.tar.gz 432 433 and they need renamed to this 434 435 alpha.tgz 436 beta.tgz 437 gamma.tgz 438 439 Below is a possible implementation of a script to carry out the rename 440 (error cases have been omitted) 441 442 foreach my $old ( glob "*.tar.gz" ) 443 { 444 my $new = $old; 445 $new =~ s#(.*)\.tar\.gz$#$1.tgz# ; 446 447 rename $old => $new 448 or die "Cannot rename '$old' to '$new': $!\n; 449 } 450 451 Notice that a file glob pattern C<*.tar.gz> was used to match the 452 C<.tar.gz> files, then a fairly similar regular expression was used in 453 the substitute to allow the new filename to be created. 454 455 Given that the file glob is just a cut-down regular expression and that it 456 has already done a lot of the hard work in pattern matching the filenames, 457 wouldn't it be handy to be able to use the patterns in the fileglob to 458 drive the new filename? 459 460 Well, that's I<exactly> what C<File::GlobMapper> does. 461 462 Here is same snippet of code rewritten using C<globmap> 463 464 for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' ) 465 { 466 my ($from, $to) = @$pair; 467 rename $from => $to 468 or die "Cannot rename '$old' to '$new': $!\n; 469 } 470 471 So how does it work? 472 473 Behind the scenes the C<globmap> function does a combination of a 474 file glob to match existing filenames followed by a substitute 475 to create the new filenames. 476 477 Notice how both parameters to C<globmap> are strings that are delimited by <>. 478 This is done to make them look more like file globs - it is just syntactic 479 sugar, but it can be handy when you want the strings to be visually 480 distinctive. The enclosing <> are optional, so you don't have to use them - in 481 fact the first thing globmap will do is remove these delimiters if they are 482 present. 483 484 The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>. 485 Once the enclosing "< ... >" is removed, this is passed (more or 486 less) unchanged to C<File::Glob> to carry out a file match. 487 488 Next the fileglob C<*.tar.gz> is transformed behind the scenes into a 489 full Perl regular expression, with the additional step of wrapping each 490 transformed wildcard metacharacter sequence in parenthesis. 491 492 In this case the input fileglob C<*.tar.gz> will be transformed into 493 this Perl regular expression 494 495 ([^/]*)\.tar\.gz 496 497 Wrapping with parenthesis allows the wildcard parts of the Input File 498 Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>, 499 the I<Output File Glob>. This parameter operates just like the replacement 500 part of a substitute command. The difference is that the C<#1> syntax 501 is used to reference sub-patterns matched in the input fileglob, rather 502 than the C<$1> syntax that is used with perl regular expressions. In 503 this case C<#1> is used to refer to the text matched by the C<*> in the 504 Input File Glob. This makes it easier to use this module where the 505 parameters to C<globmap> are typed at the command line. 506 507 The final step involves passing each filename matched by the C<*.tar.gz> 508 file glob through the derived Perl regular expression in turn and 509 expanding the output fileglob using it. 510 511 The end result of all this is a list of pairs of filenames. By default 512 that is what is returned by C<globmap>. In this example the data structure 513 returned will look like this 514 515 ( ['alpha.tar.gz' => 'alpha.tgz'], 516 ['beta.tar.gz' => 'beta.tgz' ], 517 ['gamma.tar.gz' => 'gamma.tgz'] 518 ) 519 520 521 Each pair is an array reference with two elements - namely the I<from> 522 filename, that C<File::Glob> has matched, and a I<to> filename that is 523 derived from the I<from> filename. 524 525 526 527 =head2 Limitations 528 529 C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to 530 solve all filename mapping operations. Under the hood C<File::Glob> (or for 531 older versions of Perl, C<File::BSDGlob>) is used to match the files, so you 532 will never have the flexibility of full Perl regular expression. 533 534 =head2 Input File Glob 535 536 The syntax for an Input FileGlob is identical to C<File::Glob>, except 537 for the following 538 539 =over 5 540 541 =item 1. 542 543 No nested {} 544 545 =item 2. 546 547 Whitespace does not delimit fileglobs. 548 549 =item 3. 550 551 The use of parenthesis can be used to capture parts of the input filename. 552 553 =item 4. 554 555 If an Input glob matches the same file more than once, only the first 556 will be used. 557 558 =back 559 560 The syntax 561 562 =over 5 563 564 =item B<~> 565 566 =item B<~user> 567 568 569 =item B<.> 570 571 Matches a literal '.'. 572 Equivalent to the Perl regular expression 573 574 \. 575 576 =item B<*> 577 578 Matches zero or more characters, except '/'. Equivalent to the Perl 579 regular expression 580 581 [^/]* 582 583 =item B<?> 584 585 Matches zero or one character, except '/'. Equivalent to the Perl 586 regular expression 587 588 [^/]? 589 590 =item B<\> 591 592 Backslash is used, as usual, to escape the next character. 593 594 =item B<[]> 595 596 Character class. 597 598 =item B<{,}> 599 600 Alternation 601 602 =item B<()> 603 604 Capturing parenthesis that work just like perl 605 606 =back 607 608 Any other character it taken literally. 609 610 =head2 Output File Glob 611 612 The Output File Glob is a normal string, with 2 glob-like features. 613 614 The first is the '*' metacharacter. This will be replaced by the complete 615 filename matched by the input file glob. So 616 617 *.c *.Z 618 619 The second is 620 621 Output FileGlobs take the 622 623 =over 5 624 625 =item "*" 626 627 The "*" character will be replaced with the complete input filename. 628 629 =item #1 630 631 Patterns of the form /#\d/ will be replaced with the 632 633 =back 634 635 =head2 Returned Data 636 637 638 =head1 EXAMPLES 639 640 =head2 A Rename script 641 642 Below is a simple "rename" script that uses C<globmap> to determine the 643 source and destination filenames. 644 645 use File::GlobMapper qw(globmap) ; 646 use File::Copy; 647 648 die "rename: Usage rename 'from' 'to'\n" 649 unless @ARGV == 2 ; 650 651 my $fromGlob = shift @ARGV; 652 my $toGlob = shift @ARGV; 653 654 my $pairs = globmap($fromGlob, $toGlob) 655 or die $File::GlobMapper::Error; 656 657 for my $pair (@$pairs) 658 { 659 my ($from, $to) = @$pair; 660 move $from => $to ; 661 } 662 663 664 665 Here is an example that renames all c files to cpp. 666 667 $ rename '*.c' '#1.cpp' 668 669 =head2 A few example globmaps 670 671 Below are a few examples of globmaps 672 673 To copy all your .c file to a backup directory 674 675 '</my/home/*.c>' '</my/backup/#1.c>' 676 677 If you want to compress all 678 679 '</my/home/*.[ch]>' '<*.gz>' 680 681 To uncompress 682 683 '</my/home/*.[ch].gz>' '</my/home/#1.#2>' 684 685 =head1 SEE ALSO 686 687 L<File::Glob|File::Glob> 688 689 =head1 AUTHOR 690 691 The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>. 692 693 =head1 COPYRIGHT AND LICENSE 694 695 Copyright (c) 2005 Paul Marquess. All rights reserved. 696 This program is free software; you can redistribute it and/or 697 modify it under the same terms as Perl itself.
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Tue Mar 17 22:47:18 2015 | Cross-referenced by PHPXref 0.7.1 |