[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/se3-unattended/var/se3/unattended/install/linuxaux/opt/perl/lib/5.10.0/i586-linux-thread-multi/File/ -> GlobMapper.pm (source)

   1  package File::GlobMapper;
   2  
   3  use strict;
   4  use warnings;
   5  use Carp;
   6  
   7  our ($CSH_GLOB);
   8  
   9  BEGIN
  10  {
  11      if ($] < 5.006)
  12      { 
  13          require File::BSDGlob; import File::BSDGlob qw(:glob) ;
  14          $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
  15          *globber = \&File::BSDGlob::csh_glob;
  16      }  
  17      else
  18      { 
  19          require File::Glob; import File::Glob qw(:glob) ;
  20          $CSH_GLOB = File::Glob::GLOB_CSH() ;
  21          #*globber = \&File::Glob::bsd_glob;
  22          *globber = \&File::Glob::csh_glob;
  23      }  
  24  }
  25  
  26  our ($Error);
  27  
  28  our ($VERSION, @EXPORT_OK);
  29  $VERSION = '0.000_02';
  30  @EXPORT_OK = qw( globmap );
  31  
  32  
  33  our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
  34  $noPreBS = '(?<!\\\)' ; # no preceeding backslash
  35  $metachars = '.*?[](){}';
  36  $matchMetaRE = '[' . quotemeta($metachars) . ']';
  37  
  38  %mapping = (
  39                  '*' => '([^/]*)',
  40                  '?' => '([^/])',
  41                  '.' => '\.',
  42                  '[' => '([',
  43                  '(' => '(',
  44                  ')' => ')',
  45             );
  46  
  47  %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;           
  48  
  49  sub globmap ($$;)
  50  {
  51      my $inputGlob = shift ;
  52      my $outputGlob = shift ;
  53  
  54      my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
  55          or croak "globmap: $Error" ;
  56      return $obj->getFileMap();
  57  }
  58  
  59  sub new
  60  {
  61      my $class = shift ;
  62      my $inputGlob = shift ;
  63      my $outputGlob = shift ;
  64      # TODO -- flags needs to default to whatever File::Glob does
  65      my $flags = shift || $CSH_GLOB ;
  66      #my $flags = shift ;
  67  
  68      $inputGlob =~ s/^\s*\<\s*//;
  69      $inputGlob =~ s/\s*\>\s*$//;
  70  
  71      $outputGlob =~ s/^\s*\<\s*//;
  72      $outputGlob =~ s/\s*\>\s*$//;
  73  
  74      my %object =
  75              (   InputGlob   => $inputGlob,
  76                  OutputGlob  => $outputGlob,
  77                  GlobFlags   => $flags,
  78                  Braces      => 0,
  79                  WildCount   => 0,
  80                  Pairs       => [],
  81                  Sigil       => '#',
  82              );
  83  
  84      my $self = bless \%object, ref($class) || $class ;
  85  
  86      $self->_parseInputGlob()
  87          or return undef ;
  88  
  89      $self->_parseOutputGlob()
  90          or return undef ;
  91      
  92      my @inputFiles = globber($self->{InputGlob}, $flags) ;
  93  
  94      if (GLOB_ERROR)
  95      {
  96          $Error = $!;
  97          return undef ;
  98      }
  99  
 100      #if (whatever)
 101      {
 102          my $missing = grep { ! -e $_ } @inputFiles ;
 103  
 104          if ($missing)
 105          {
 106              $Error = "$missing input files do not exist";
 107              return undef ;
 108          }
 109      }
 110  
 111      $self->{InputFiles} = \@inputFiles ;
 112  
 113      $self->_getFiles()
 114          or return undef ;
 115  
 116      return $self;
 117  }
 118  
 119  sub _retError
 120  {
 121      my $string = shift ;
 122      $Error = "$string in input fileglob" ;
 123      return undef ;
 124  }
 125  
 126  sub _unmatched
 127  {
 128      my $delimeter = shift ;
 129  
 130      _retError("Unmatched $delimeter");
 131      return undef ;
 132  }
 133  
 134  sub _parseBit
 135  {
 136      my $self = shift ;
 137  
 138      my $string = shift ;
 139  
 140      my $out = '';
 141      my $depth = 0 ;
 142  
 143      while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
 144      {
 145          $out .= quotemeta($1) ;
 146          $out .= $mapping{$2} if defined $mapping{$2};
 147  
 148          ++ $self->{WildCount} if $wildCount{$2} ;
 149  
 150          if ($2 eq ',')
 151          { 
 152              return _unmatched "("
 153                  if $depth ;
 154              
 155              $out .= '|';
 156          }
 157          elsif ($2 eq '(')
 158          { 
 159              ++ $depth ;
 160          }
 161          elsif ($2 eq ')')
 162          { 
 163              return _unmatched ")"
 164                  if ! $depth ;
 165  
 166              -- $depth ;
 167          }
 168          elsif ($2 eq '[')
 169          {
 170              # TODO -- quotemeta & check no '/'
 171              # TODO -- check for \]  & other \ within the []
 172              $string =~ s#(.*?\])##
 173                  or return _unmatched "[" ;
 174              $out .= "$1)" ;
 175          }
 176          elsif ($2 eq ']')
 177          {
 178              return _unmatched "]" ;
 179          }
 180          elsif ($2 eq '{' || $2 eq '}')
 181          {
 182              return _retError "Nested {} not allowed" ;
 183          }
 184      }
 185  
 186      $out .= quotemeta $string;
 187  
 188      return _unmatched "("
 189          if $depth ;
 190  
 191      return $out ;
 192  }
 193  
 194  sub _parseInputGlob
 195  {
 196      my $self = shift ;
 197  
 198      my $string = $self->{InputGlob} ;
 199      my $inGlob = '';
 200  
 201      # Multiple concatenated *'s don't make sense
 202      #$string =~ s#\*\*+#*# ;
 203  
 204      # TODO -- Allow space to delimit patterns?
 205      #my @strings = split /\s+/, $string ;
 206      #for my $str (@strings)
 207      my $out = '';
 208      my $depth = 0 ;
 209  
 210      while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
 211      {
 212          $out .= quotemeta($1) ;
 213          $out .= $mapping{$2} if defined $mapping{$2};
 214          ++ $self->{WildCount} if $wildCount{$2} ;
 215  
 216          if ($2 eq '(')
 217          { 
 218              ++ $depth ;
 219          }
 220          elsif ($2 eq ')')
 221          { 
 222              return _unmatched ")"
 223                  if ! $depth ;
 224  
 225              -- $depth ;
 226          }
 227          elsif ($2 eq '[')
 228          {
 229              # TODO -- quotemeta & check no '/' or '(' or ')'
 230              # TODO -- check for \]  & other \ within the []
 231              $string =~ s#(.*?\])##
 232                  or return _unmatched "[";
 233              $out .= "$1)" ;
 234          }
 235          elsif ($2 eq ']')
 236          {
 237              return _unmatched "]" ;
 238          }
 239          elsif ($2 eq '}')
 240          {
 241              return _unmatched "}" ;
 242          }
 243          elsif ($2 eq '{')
 244          {
 245              # TODO -- check no '/' within the {}
 246              # TODO -- check for \}  & other \ within the {}
 247  
 248              my $tmp ;
 249              unless ( $string =~ s/(.*?)$noPreBS\}//)
 250              {
 251                  return _unmatched "{";
 252              }
 253              #$string =~ s#(.*?)\}##;
 254  
 255              #my $alt = join '|', 
 256              #          map { quotemeta $_ } 
 257              #          split "$noPreBS,", $1 ;
 258              my $alt = $self->_parseBit($1);
 259              defined $alt or return 0 ;
 260              $out .= "($alt)" ;
 261  
 262              ++ $self->{Braces} ;
 263          }
 264      }
 265  
 266      return _unmatched "("
 267          if $depth ;
 268  
 269      $out .= quotemeta $string ;
 270  
 271  
 272      $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
 273      $self->{InputPattern} = $out ;
 274  
 275      #print "# INPUT '$self->{InputGlob}' => '$out'\n";
 276  
 277      return 1 ;
 278  
 279  }
 280  
 281  sub _parseOutputGlob
 282  {
 283      my $self = shift ;
 284  
 285      my $string = $self->{OutputGlob} ;
 286      my $maxwild = $self->{WildCount};
 287  
 288      if ($self->{GlobFlags} & GLOB_TILDE)
 289      #if (1)
 290      {
 291          $string =~ s{
 292                ^ ~             # find a leading tilde
 293                (               # save this in $1
 294                    [^/]        # a non-slash character
 295                          *     # repeated 0 or more times (0 means me)
 296                )
 297              }{
 298                $1
 299                    ? (getpwnam($1))[7]
 300                    : ( $ENV{HOME} || $ENV{LOGDIR} )
 301              }ex;
 302  
 303      }
 304  
 305      # max #1 must be == to max no of '*' in input
 306      while ( $string =~ m/#(\d)/g )
 307      {
 308          croak "Max wild is #$maxwild, you tried #$1"
 309              if $1 > $maxwild ;
 310      }
 311  
 312      my $noPreBS = '(?<!\\\)' ; # no preceeding backslash
 313      #warn "noPreBS = '$noPreBS'\n";
 314  
 315      #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
 316      $string =~ s/$noPreBS}#(\d)/\${$1}/g;
 317      $string =~ s#$noPreBS}\*#\$inFile}#g;
 318      $string = '"' . $string . '"';
 319  
 320      #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
 321      $self->{OutputPattern} = $string ;
 322  
 323      return 1 ;
 324  }
 325  
 326  sub _getFiles
 327  {
 328      my $self = shift ;
 329  
 330      my %outInMapping = ();
 331      my %inFiles = () ;
 332  
 333      foreach my $inFile (@{ $self->{InputFiles} })
 334      {
 335          next if $inFiles{$inFile} ++ ;
 336  
 337          my $outFile = $inFile ;
 338  
 339          if ( $inFile =~ m/$self->{InputPattern}/ )
 340          {
 341              no warnings 'uninitialized';
 342              eval "\$outFile = $self->{OutputPattern};" ;
 343  
 344              if (defined $outInMapping{$outFile})
 345              {
 346                  $Error =  "multiple input files map to one output file";
 347                  return undef ;
 348              }
 349              $outInMapping{$outFile} = $inFile;
 350              push @{ $self->{Pairs} }, [$inFile, $outFile];
 351          }
 352      }
 353  
 354      return 1 ;
 355  }
 356  
 357  sub getFileMap
 358  {
 359      my $self = shift ;
 360  
 361      return $self->{Pairs} ;
 362  }
 363  
 364  sub getHash
 365  {
 366      my $self = shift ;
 367  
 368      return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
 369  }
 370  
 371  1;
 372  
 373  __END__
 374  
 375  =head1 NAME
 376  
 377  File::GlobMapper - Extend File Glob to Allow Input and Output Files
 378  
 379  =head1 SYNOPSIS
 380  
 381      use File::GlobMapper qw( globmap );
 382  
 383      my $aref = globmap $input => $output
 384          or die $File::GlobMapper::Error ;
 385  
 386      my $gm = new File::GlobMapper $input => $output
 387          or die $File::GlobMapper::Error ;
 388  
 389  
 390  =head1 DESCRIPTION
 391  
 392  B<WARNING Alpha Release Alert!> 
 393  
 394  =over 5
 395  
 396  =item * This code is a work in progress. 
 397  
 398  =item * There are known bugs. 
 399  
 400  =item * The interface defined here is tentative. 
 401  
 402  =item * There are portability issues. 
 403  
 404  =item * Do not use in production code.
 405  
 406  =item * Consider yourself warned!
 407  
 408  =back
 409  
 410  This module needs Perl5.005 or better.
 411  
 412  This module takes the existing C<File::Glob> module as a starting point and
 413  extends it to allow new filenames to be derived from the files matched by
 414  C<File::Glob>.
 415  
 416  This can be useful when carrying out batch operations on multiple files that
 417  have both an input filename and output filename and the output file can be
 418  derived from the input filename. Examples of operations where this can be
 419  useful include, file renaming, file copying and file compression.
 420  
 421  
 422  =head2 Behind The Scenes
 423  
 424  To help explain what C<File::GlobMapper> does, consider what code you
 425  would write if you wanted to rename all files in the current directory
 426  that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
 427  current directory
 428  
 429      alpha.tar.gz
 430      beta.tar.gz
 431      gamma.tar.gz
 432  
 433  and they need renamed to this
 434  
 435      alpha.tgz
 436      beta.tgz
 437      gamma.tgz
 438  
 439  Below is a possible implementation of a script to carry out the rename
 440  (error cases have been omitted)
 441  
 442      foreach my $old ( glob "*.tar.gz" )
 443      {
 444          my $new = $old;
 445          $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
 446  
 447          rename $old => $new 
 448              or die "Cannot rename '$old' to '$new': $!\n;
 449      }
 450  
 451  Notice that a file glob pattern C<*.tar.gz> was used to match the
 452  C<.tar.gz> files, then a fairly similar regular expression was used in
 453  the substitute to allow the new filename to be created.
 454  
 455  Given that the file glob is just a cut-down regular expression and that it
 456  has already done a lot of the hard work in pattern matching the filenames,
 457  wouldn't it be handy to be able to use the patterns in the fileglob to
 458  drive the new filename?
 459  
 460  Well, that's I<exactly> what C<File::GlobMapper> does. 
 461  
 462  Here is same snippet of code rewritten using C<globmap>
 463  
 464      for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
 465      {
 466          my ($from, $to) = @$pair;
 467          rename $from => $to 
 468              or die "Cannot rename '$old' to '$new': $!\n;
 469      }
 470  
 471  So how does it work?
 472  
 473  Behind the scenes the C<globmap> function does a combination of a
 474  file glob to match existing filenames followed by a substitute
 475  to create the new filenames. 
 476  
 477  Notice how both parameters to C<globmap> are strings that are delimited by <>.
 478  This is done to make them look more like file globs - it is just syntactic
 479  sugar, but it can be handy when you want the strings to be visually
 480  distinctive. The enclosing <> are optional, so you don't have to use them - in
 481  fact the first thing globmap will do is remove these delimiters if they are
 482  present.
 483  
 484  The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>. 
 485  Once the enclosing "< ... >" is removed, this is passed (more or
 486  less) unchanged to C<File::Glob> to carry out a file match.
 487  
 488  Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
 489  full Perl regular expression, with the additional step of wrapping each
 490  transformed wildcard metacharacter sequence in parenthesis.
 491  
 492  In this case the input fileglob C<*.tar.gz> will be transformed into
 493  this Perl regular expression 
 494  
 495      ([^/]*)\.tar\.gz
 496  
 497  Wrapping with parenthesis allows the wildcard parts of the Input File
 498  Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
 499  the I<Output File Glob>. This parameter operates just like the replacement
 500  part of a substitute command. The difference is that the C<#1> syntax
 501  is used to reference sub-patterns matched in the input fileglob, rather
 502  than the C<$1> syntax that is used with perl regular expressions. In
 503  this case C<#1> is used to refer to the text matched by the C<*> in the
 504  Input File Glob. This makes it easier to use this module where the
 505  parameters to C<globmap> are typed at the command line.
 506  
 507  The final step involves passing each filename matched by the C<*.tar.gz>
 508  file glob through the derived Perl regular expression in turn and
 509  expanding the output fileglob using it.
 510  
 511  The end result of all this is a list of pairs of filenames. By default
 512  that is what is returned by C<globmap>. In this example the data structure
 513  returned will look like this
 514  
 515       ( ['alpha.tar.gz' => 'alpha.tgz'],
 516         ['beta.tar.gz'  => 'beta.tgz' ],
 517         ['gamma.tar.gz' => 'gamma.tgz']
 518       )
 519  
 520  
 521  Each pair is an array reference with two elements - namely the I<from>
 522  filename, that C<File::Glob> has matched, and a I<to> filename that is
 523  derived from the I<from> filename.
 524  
 525  
 526  
 527  =head2 Limitations
 528  
 529  C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
 530  solve all filename mapping operations. Under the hood C<File::Glob> (or for
 531  older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
 532  will never have the flexibility of full Perl regular expression.
 533  
 534  =head2 Input File Glob
 535  
 536  The syntax for an Input FileGlob is identical to C<File::Glob>, except
 537  for the following
 538  
 539  =over 5
 540  
 541  =item 1.
 542  
 543  No nested {}
 544  
 545  =item 2.
 546  
 547  Whitespace does not delimit fileglobs.
 548  
 549  =item 3.
 550  
 551  The use of parenthesis can be used to capture parts of the input filename.
 552  
 553  =item 4.
 554  
 555  If an Input glob matches the same file more than once, only the first
 556  will be used.
 557  
 558  =back
 559  
 560  The syntax
 561  
 562  =over 5
 563  
 564  =item B<~>
 565  
 566  =item B<~user>
 567  
 568  
 569  =item B<.>
 570  
 571  Matches a literal '.'.
 572  Equivalent to the Perl regular expression
 573  
 574      \.
 575  
 576  =item B<*>
 577  
 578  Matches zero or more characters, except '/'. Equivalent to the Perl
 579  regular expression
 580  
 581      [^/]*
 582  
 583  =item B<?>
 584  
 585  Matches zero or one character, except '/'. Equivalent to the Perl
 586  regular expression
 587  
 588      [^/]?
 589  
 590  =item B<\>
 591  
 592  Backslash is used, as usual, to escape the next character.
 593  
 594  =item  B<[]>
 595  
 596  Character class.
 597  
 598  =item  B<{,}>
 599  
 600  Alternation
 601  
 602  =item  B<()>
 603  
 604  Capturing parenthesis that work just like perl
 605  
 606  =back
 607  
 608  Any other character it taken literally.
 609  
 610  =head2 Output File Glob
 611  
 612  The Output File Glob is a normal string, with 2 glob-like features.
 613  
 614  The first is the '*' metacharacter. This will be replaced by the complete
 615  filename matched by the input file glob. So
 616  
 617      *.c *.Z
 618  
 619  The second is     
 620  
 621  Output FileGlobs take the 
 622  
 623  =over 5
 624  
 625  =item "*"
 626  
 627  The "*" character will be replaced with the complete input filename.
 628  
 629  =item #1
 630  
 631  Patterns of the form /#\d/ will be replaced with the 
 632  
 633  =back
 634  
 635  =head2 Returned Data
 636  
 637  
 638  =head1 EXAMPLES
 639  
 640  =head2 A Rename script
 641  
 642  Below is a simple "rename" script that uses C<globmap> to determine the
 643  source and destination filenames.
 644  
 645      use File::GlobMapper qw(globmap) ;
 646      use File::Copy;
 647  
 648      die "rename: Usage rename 'from' 'to'\n"
 649          unless @ARGV == 2 ;
 650  
 651      my $fromGlob = shift @ARGV;
 652      my $toGlob   = shift @ARGV;
 653  
 654      my $pairs = globmap($fromGlob, $toGlob)
 655          or die $File::GlobMapper::Error;
 656  
 657      for my $pair (@$pairs)
 658      {
 659          my ($from, $to) = @$pair;
 660          move $from => $to ;
 661      }
 662  
 663  
 664  
 665  Here is an example that renames all c files to cpp.
 666      
 667      $ rename '*.c' '#1.cpp'
 668  
 669  =head2 A few example globmaps
 670  
 671  Below are a few examples of globmaps
 672  
 673  To copy all your .c file to a backup directory
 674  
 675      '</my/home/*.c>'    '</my/backup/#1.c>'
 676  
 677  If you want to compress all    
 678  
 679      '</my/home/*.[ch]>'    '<*.gz>'
 680  
 681  To uncompress
 682  
 683      '</my/home/*.[ch].gz>'    '</my/home/#1.#2>'
 684  
 685  =head1 SEE ALSO
 686  
 687  L<File::Glob|File::Glob>
 688  
 689  =head1 AUTHOR
 690  
 691  The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
 692  
 693  =head1 COPYRIGHT AND LICENSE
 694  
 695  Copyright (c) 2005 Paul Marquess. All rights reserved.
 696  This program is free software; you can redistribute it and/or
 697  modify it under the same terms as Perl itself.


Generated: Tue Mar 17 22:47:18 2015 Cross-referenced by PHPXref 0.7.1