File Coverage

blib/lib/FileHash/Base.pm
Criterion Covered Total %
statement 24 200 12.0
branch 0 90 0.0
condition 0 3 0.0
subroutine 8 30 26.6
pod 18 18 100.0
total 50 341 14.6


line stmt bran cond sub pod time code
1             #================================= Base.pm ===================================
2             # Filename: Base.pm
3             # Description: Generalized hash by full path of file information.
4             # Original Author: Dale M. Amon
5             # Revised by: $Author: amon $
6             # Date: $Date: 2008-08-28 23:35:28 $
7             # Version: $Revision: 1.10 $
8             # License: LGPL 2.1, Perl Artistic or BSD
9             #
10             #=============================================================================
11 1     1   2082 use strict;
  1         3  
  1         43  
12 1     1   974 use Fault::Logger;
  1         36647  
  1         37  
13 1     1   13 use Fault::DebugPrinter;
  1         9  
  1         24  
14 1     1   5 use File::Spec;
  1         2  
  1         25  
15 1     1   864 use FileHash::Entry;
  1         4  
  1         33  
16 1     1   10 use FileHash::FormatString;
  1         2  
  1         20  
17 1     1   5 use Cwd qw(abs_path);
  1         2  
  1         66  
18              
19             package FileHash::Base;
20 1     1   5 use vars qw{@ISA};
  1         9  
  1         2835  
21             @ISA = qw( UNIVERSAL );
22              
23             #=============================================================================
24             # INTERNAL OPS
25             #=============================================================================
26              
27             sub _leaf {
28 0     0     my ($self,$dev,$directory,$file) = @_;
29 0           my $entry = FileHash::Entry->alloc;
30 0           my $path = File::Spec->catfile($dev,$directory,$file);
31              
32 0           return $entry->initFromStat ($path);
33             }
34              
35             #-----------------------------------------------------------------------------
36              
37             sub _branch {
38 0     0     my ($self,$dev,$directory,$file,$depth) = @_;
39 0           my $path = File::Spec->catfile($dev,$directory,$file);
40            
41 0 0 0       if (-d "$path" and ! -l "$path") {
42 0           my ($fh,$new);
43              
44 0 0         Fault::Logger->assertion_check
45             (!(opendir $fh, "$path"),undef,"Can not open '$path': $!")
46             or return $self;
47              
48             # Readdir returns a null list if there are no files.
49 0           $depth++;
50 0           my @dirlist = readdir ($fh);
51 0           closedir $fh;
52            
53 0           foreach $new (@dirlist) {
54 0 0         next if ($new eq ".");
55 0 0         next if ($new eq "..");
56 0           $self->_branch ($dev,File::Spec->catfile($directory,$file),$new,$depth);
57             }
58 0           $depth--;
59             }
60             else {
61 0           my $entry = $self->_leaf ($dev,$directory,$file);
62 0 0         defined $entry or return undef;
63              
64 0           $self->_store ($self->_genKey ($entry),$entry);
65             }
66 0           return $self;
67             }
68              
69             #=============================================================================
70              
71             sub _store {
72 0     0     my ($self,$key,$val) = @_;
73              
74 0 0         if (! exists $self->{'filehash'}->{$key}) {
75 0           $self->{'filehash'}->{$key} = [$val];
76 0           Fault::DebugPrinter->dbg (2,"NEW KEY <$key>");
77             }
78             else {
79 0           Fault::DebugPrinter->dbg (2,"DUPLICATE KEY <$key>");
80 0           push @{$self->{'filehash'}->{$key}}, $val;
  0            
81             }
82 0           my $path = $val->path;
83 0           Fault::DebugPrinter->dbg (3," FILE <$path>");
84 0           return $self;
85             }
86              
87             #-----------------------------------------------------------------------------
88              
89             sub _genKey
90 0     0     {Fault::Logger->crash ("Subclass must impliment: _genKey");}
91              
92             #=============================================================================
93             # CLASS METHODS
94             #=============================================================================
95              
96             sub alloc ($) {
97 0     0 1   my ($class) = @_;
98 0           my $self = bless {}, $class;
99 0           $self->{'filehash'} = {};
100 0           return $self;
101             }
102              
103             #=============================================================================
104             # INSTANCE METHODS
105             #=============================================================================
106             # Init methods
107             #-----------------------------------------------------------------------------
108 0     0 1   sub init ($) {shift;}
109 0     0 1   sub initFromLines ($$@) {shift->addFromLines (@_);}
110 0     0 1   sub initFromFile ($$$) {shift->addFromFile (@_);}
111 0     0 1   sub initFromTree ($$) {shift->addFromTree (@_);}
112 0     0 1   sub initFromObject ($$) {shift->addFromObject (@_);}
113 0     0 1   sub initFromDump ($$) {shift->addFromDump (@_);}
114              
115             #=============================================================================
116             # add methods
117             #-----------------------------------------------------------------------------
118             # The FileHash::Entry object will check for invalid values in @lines.
119              
120             sub addFromLines ($$@) {
121 0     0 1   my ($self,$formatline,@lines) = @_;
122              
123 0 0         if ($::DEBUG) {
124 0 0         Fault::Logger->arg_check_noref ($formatline,"formatline") or return undef;
125             }
126              
127 0           my $fmt = FileHash::FormatString->alloc;
128 0 0         $fmt->init ($formatline) or return undef;
129              
130 0           $self->{'format'} = $fmt;
131 0           foreach (@lines) {
132 0 0         next if (/^\w*$/);
133              
134 0           my $entry = FileHash::Entry->alloc;
135 0 0         $entry->initFromLine ($self->{'format'},$_) or next;
136              
137 0           $self->_store ($self->_genKey ($entry),$entry);
138             }
139 0           return $self;
140             }
141              
142             #-----------------------------------------------------------------------------
143              
144             sub addFromFile ($$$) {
145 0     0 1   my ($self,$formatline,$listfname) = @_;
146 0           my $fh;
147              
148 0 0         if ($::DEBUG) {
149 0 0         Fault::Logger->arg_check_noref ($formatline,"formatline") or return undef;
150 0 0         Fault::Logger->arg_check_noref ($listfname, "listfname" ) or return undef;
151             }
152              
153             Fault::Logger->assertion_check
154 0 0         (!(open $fh, "<$listfname"),undef,"Can not open '$listfname': $!")
155             or return undef;
156              
157             # Readline returns a null list if there are no lines.
158 0           my @lines = readline $fh;
159              
160 0           return $self->addFromLines ($formatline,@lines);
161             }
162              
163             #-----------------------------------------------------------------------------
164              
165             sub addFromTree ($$) {
166 0     0 1   my ($self,$path) = @_;
167              
168 0 0         if ($::DEBUG) {
169 0 0         Fault::Logger->arg_check_noref ($path,"path") or return undef;
170             }
171              
172 0           my ($dev,$directories,$file) =
173             File::Spec->splitpath(Cwd::abs_path($path));
174            
175 0           $self->_branch ($dev,$directories,$file, 0);
176 0           return $self;
177             }
178              
179             #-----------------------------------------------------------------------------
180              
181             sub addFromObject ($$) {
182 0     0 1   my ($self,$old) = @_;
183              
184 0 0         if ($::DEBUG) {
185 0 0         Fault::Logger->arg_check_isa ($old,"FileHash::Base","oldfilehash")
186             or return undef;
187             }
188              
189 0           foreach my $i (values %{$old->{'filehash'}}) {
  0            
190 0           foreach my $j (@$i) {
191 0           $self->_store ($self->_genKey ($j),$j);
192             }
193             }
194 0           return $self;
195             }
196              
197             #-----------------------------------------------------------------------------
198              
199             sub addFromDump ($$) {
200 0     0 1   my ($self,$path) = @_;
201 0           my ($fh,$entry,$s,$vers) = undef;
202              
203 0 0         if ($::DEBUG) {
204 0 0         Fault::Logger->arg_check_noref ($path,"path") or return undef;
205             }
206              
207             Fault::Logger->assertion_check
208 0 0         (!(open $fh, "<$path"),undef,"Can not open '$path': $!")
209             or return undef;
210              
211 0           $_ = (<$fh>);
212 0 0         if (/^Version:/) {($s,$vers) = split;}
  0            
213             else {
214 0           Fault::Logger->log
215             ("No Format version number found in report file header: '$path'");
216 0           close $fh;
217 0           return undef;
218             }
219              
220 0           while (1) {
221 0           my $entry = FileHash::Entry->alloc;
222 0 0         $entry->addFromDump($fh) or last;
223              
224 0           $self->_store ($self->_genKey ($entry),$entry);
225             }
226 0           close $fh;
227 0           return $self;
228             }
229              
230             #=============================================================================
231             # Unary operators
232             #=============================================================================
233              
234             sub identical ($) {
235 0     0 1   my ($fha) = @_;
236 0           my $class = ref $fha;
237 0           my ($j,$k,$v1);
238            
239 0           my $fhb = $class->alloc;
240 0 0         $fhb->init or return undef;
241            
242 0           my $a = $fha->{'filehash'};
243 0           foreach $v1 (values %$a) {
244 0 0         if ($#$v1 > 0) {
245 0           foreach $j (@$v1) {$fhb->_store ($fhb->_genKey ($j),$j);}
  0            
246             }
247             }
248 0           return $fhb;
249             }
250              
251             #-----------------------------------------------------------------------------
252              
253             sub unique ($) {
254 0     0 1   my ($fha) = @_;
255 0           my $class = ref $fha;
256 0           my ($j,$k,$v1);
257            
258 0           my $fhb = $class->alloc;
259 0 0         $fhb->init or return undef;
260            
261 0           my $a = $fha->{'filehash'};
262 0           foreach $v1 (values %$a) {
263 0 0         if ($#$v1 == 0) {
264 0           foreach $j (@$v1) {$fhb->_store ($fhb->_genKey ($j),$j);}
  0            
265             }
266             }
267 0           return $fhb;
268             }
269              
270             #=============================================================================
271             # Binary operators
272             #=============================================================================
273              
274             sub xor ($$) {
275 0     0 1   my ($fha,$fhb) = @_;
276 0           my $class = ref $fha;
277 0           my ($j, $k,$v1,$v2);
278              
279 0 0         if ($::DEBUG) {
280 0 0         Fault::Logger->arg_check_isa ($fhb,$class,"fhb") or return undef;
281             }
282              
283 0           my $fhc = $class->alloc;
284 0 0         $fhc->init or return undef;
285              
286 0           my ($a,$b) = ($fha->{'filehash'},$fhb->{'filehash'});
287 0           while (($k,$v1) = each %$a) {
288 0 0         if (!exists $b->{$k}) {
289 0           foreach $j (@$v1) {$fhc->_store ($fhc->_genKey ($j),$j);}
  0            
290             }
291             }
292 0           while (($k,$v2) = each %$b) {
293 0 0         if (!exists $a->{$k}) {
294 0           foreach $j (@$v2) {$fhc->_store ($fhc->_genKey ($j),$j);}
  0            
295             }
296             }
297 0           return $fhc;
298             }
299              
300             #-----------------------------------------------------------------------------
301              
302             sub and ($$) {
303 0     0 1   my ($fha,$fhb) = @_;
304 0           my $class = ref $fha;
305 0           my ($j,$k,$v1,$v2);
306              
307 0 0         if ($::DEBUG) {
308 0 0         Fault::Logger->arg_check_isa ($fhb,$class,"fhb") or return undef;
309             }
310              
311 0           my $fhc = $class->alloc;
312 0 0         $fhc->init or return undef;
313              
314 0           my ($a,$b) = ($fha->{'filehash'},$fhb->{'filehash'});
315 0           while (($k,$v1) = each %$a) {
316 0 0         if (exists $b->{$k}) {
317 0           my $v2 = $b->{$k};
318 0           foreach $j (@$v1) {$fhc->_store ($fhc->_genKey ($j),$j);}
  0            
319 0           foreach $j (@$v2) {$fhc->_store ($fhc->_genKey ($j),$j);}
  0            
320             }
321             }
322 0           return $fhc;
323             }
324              
325             #-----------------------------------------------------------------------------
326              
327             sub andnot ($$) {
328 0     0 1   my ($fha,$fhb) = @_;
329 0           my $class = ref $fha;
330 0           my ($j,$k,$v1,$v2);
331              
332 0 0         if ($::DEBUG) {
333 0 0         Fault::Logger->arg_check_isa ($fhb,$class,"fhb") or return undef;
334             }
335              
336 0           my $fhc = $class->alloc;
337 0 0         $fhc->init or return undef;
338              
339 0           my ($a,$b) = ($fha->{'filehash'},$fhb->{'filehash'});
340 0           while (($k,$v1) = each %$a) {
341 0 0         if (!exists $b->{$k}) {
342 0           foreach $j (@$v1) {$fhc->_store ($fhc->_genKey ($j),$j);}
  0            
343             }
344             }
345 0           return $fhc;
346             }
347              
348             #=============================================================================
349              
350             sub dump ($$) {
351 0     0 1   my ($self,$dumpfile) = @_;
352 0           my $fh;
353              
354 0 0         if ($::DEBUG) {
355 0 0         Fault::Logger->arg_check_noref ($dumpfile,"dumpfile") or return undef;
356             }
357              
358             Fault::Logger->assertion_check
359 0 0         (!(open $fh, ">$dumpfile"),undef,"Can not open '$dumpfile': $!")
360             or return undef;
361              
362 0 0         if (!printf $fh "Version: " . FileHash::Entry->dumpversion . "\n") {
363 0           Fault::Logger->log ("Failed to print dumpfile header: $!");
364 0           close $fh;
365 0           return undef;
366             }
367              
368 0           foreach my $i (values %{$self->{'filehash'}}) {
  0            
369 0           foreach my $j (@$i) {$j->fprint ($fh);}
  0            
370             }
371              
372 0           close $fh;
373 0           return $self;
374             }
375            
376             #=============================================================================
377             # POD DOCUMENTATION
378             #=============================================================================
379             # You may extract and format the documention section with the 'perldoc' cmd.
380              
381             =head1 NAME
382              
383             FileHash::Base - Abstract superclass for FileHashes.
384              
385             =head1 SYNOPSIS
386              
387             use FileHash;
388             $obj = FileHash::Base->alloc;
389              
390             $obj = $obj->init;
391             $obj = $obj->initFromLines ($formatline,@lines);
392             $obj = $obj->initFromFile ($formatline,$datafilepath);
393             $obj = $obj->initFromTree ($rootdir);
394             $obj = $obj->initFromObject ($obj2);
395             $obj = $obj->initFromDump ($path);
396              
397             $obj = $obj->addFromLines ($formatline,@lines);
398             $obj = $obj->addFromFile ($formatline,$datafilepath);
399             $obj = $obj->addFromTree ($rootdir);
400             $obj = $obj->addFromObject ($obj2);
401             $obj = $obj->addFromDump ($path);
402              
403             $fhb = $fha->identical;
404             $fhb = $fha->unique;
405              
406             $fhc = $fha->and ($fhb);
407             $fhc = $fha->andnot ($fhb);
408             $fhc = $fha->xor ($fhb);
409              
410             $obj = $obj->dump ($dumpfile);
411              
412             =head1 Inheritance
413              
414             UNIVERSAL
415              
416             =head1 Description
417              
418             This is an abstract superclass for containers of lists of file metadata.
419             It is not directly useable and will execute a Fault if you attempt it.
420              
421             FileHash::Name and FileHash::Content inherit most of their behavior from
422             here with the exception of hash key selection.
423              
424             =head1 Examples
425              
426             See subclasses.
427              
428             =head1 Class Variables
429              
430             None.
431              
432             =head1 Instance Variables
433              
434             filehash Pointer to a hash of arrays of FileHash::Entry objects
435             which contain all the file metadata discovered found
436             when the FileHash object was initialized. Entries for files
437             with identical keys hash into the same array, making for
438             a very efficient sort.
439              
440             =head1 Class Methods
441              
442             =over 4
443              
444             =item B<$obj = FileHash::Base-Ealloc>
445              
446             Allocate an empty instance of FileHash::Base. This is for inheritance
447             only and should not be used. Subclasses could override but there is
448             probably no reason to do so unless they add ivars. None do at present.
449              
450             =head1 Instance Methods
451              
452             Unless otherwise specified, instance methods return self on success and
453             undef on failure.
454              
455             =over 4
456              
457             =item B<$fhc = $fha-Eand ($fhb)>
458              
459             Create a file hash containing the groups of files found in both
460             filehash a and b.
461              
462             a and b must be of the same FileHash subclass and the newly created
463             c will be off that type also.
464              
465             =item B<$fhc = $fha-Eandnot ($fhb)>
466              
467             Create a file hash containing the groups of files found in filehash
468             a but not in filehash b.
469              
470             a and b must be of the same FileHash subclass and the newly created
471             c will be off that type also.
472              
473             If you want not a and b, just reverse the args; not a and not b is
474             obviously nonsensical as we are testing keys of a against keys of
475             b.
476              
477             =item B<$obj = $obj-EaddFromDump ($dumpfile)>
478              
479             Use a dump file to recreate hash entries and add them to a FileHash
480             object.
481              
482             The first line of the file must contain the text:
483              
484             Version: x.yy
485              
486             =item B<$obj = $obj-EaddFromFile ($format,$datafilepath)>
487              
488             Use the format line to create a FileHash::FormatString object. The
489             format object is used to parse each of the lines in a file which
490             contains lines of text data. Each line in the file is assumed to
491             contain data about one file which is to be added to the FileHash.
492              
493             =item B<$obj = $obj-EaddFromLines ($format,@lines)>
494              
495             Use the format line to create a FileHash:FormatString object. The
496             format object is used to parse each of the lines in a list. Each
497             line contains data about one file which is to be added to the FileHash.
498              
499             =item B<$obj = $obj-EaddFromObject ($obj2)>
500              
501             Add data to a filehash from another FileHash, $obj2. This is useful for
502             merging two objects. The subclasses need not be the same because the
503             Entries are inserted by re-hashing into the target object.
504              
505             my $a = FileHash::Name->alloc;
506             $a->initFromTree ("/root");
507             $a->addFromTree ("/home/me");
508              
509             =item B<$obj = $obj-EaddFromTree ($rootdir)>
510              
511             Entries are added to the hash via a recursive descent through a directory
512             tree. Each file is a 'leaf node' and is represented by an array record in
513             the hash. If two files have the same hash key, the are likely identical
514             so the records for them are placed together in an array under that hask key.
515              
516             =item B<$obj = $obj-Edump ($dumpfile)>
517              
518             Dump FileHash::Entry objects sequentially, one to a line, to the specified
519             filename.
520              
521             The first line of the file contains the FileHash::Entry dump file format
522             version number in this format:
523              
524             Version: x.yy
525              
526             =item B<$fhb = $fha-Eidentical>
527              
528             Return a FileHash containing the contents of hash keys which have more
529             than one member. If the keys are md5,length this represents all files
530             with the same content; if they keys are name it represents all files
531             with the same name.
532              
533             =item B<$obj = $obj-Einit>
534              
535             A noop at present. If you need an empty object, use this after alloc
536             to make sure that if init is needed in the future, it will be carried
537             out.
538              
539             =item B<$obj = $obj-EinitFromDump ($dumpfile)>
540              
541             Use a dump file to recreate hash entries in a freshly alloc'd FileHash
542             object.
543              
544             The first line of the file must contain the text:
545              
546             Version: x.yy
547              
548             =item B<$obj = $obj-EinitFromFile ($format,$datafilepath)>
549              
550             Initialize a freshly alloc'd FileHash. It uses the format line
551             to init a FileHash::FormatString object. The format object is used to parse
552             each of the lines in a file which contains lines of text data. Each line
553             in the file is assumed to contain data about one file.
554              
555             =item B<$obj = $obj-EinitFromLines ($format,@lines)>
556              
557             Initialize a freshly formatted FileHash. It uses the format line
558             to create a FileHash:FormatString object. The format object is used to parse
559             each of the lines in a list. Each line contains data about one file.
560              
561             =item B<$obj = $obj-EinitFromObject ($obj2)>
562              
563             Initialize the newly alloc'd object using data from another FileHash, $obj2.
564             This is useful for changing from hashing by name to hashing by content or
565             vice versa:
566              
567             my $a = FileHash::Name->alloc;
568             my $b = FileHash::Content->alloc;
569             $a->initFromTree ("/root");
570             $b->initFromObject ($a);
571              
572             =item B<$obj = $obj-EinitFromTree ($rootdir)>
573              
574             Initialize a freshly alloc'd FileHash. The hash is filled via a
575             recursive descent through a directory tree. Each file is a
576             'leaf node' and is represented by an array record in the hash. If
577             two files have the same hash key, the are likely identical so the records
578             for them are placed together in an array under that hask key.
579              
580             =item B<$fhb = $fha-Eunique>
581              
582             Return a FileHash containing the contents of hash keys which have only
583             one member. These are files for which no other file has the same content
584             if the key is md5,length; or the same name if the key is the name.
585              
586             =item B<$fhc = $fha-Exor ($fhb)>
587              
588             Create a filehash c which contains all of the groups of files which
589             are only in fha or fhb but not both.
590              
591             a and b must be of the same FileHash subclass and the newly created
592             c will be off that type also.
593              
594             =back 4
595              
596             =head1 Private Class Method
597              
598             None.
599              
600             =head1 Private Instance Methods
601              
602             =item B<$key = $obj-E_genKey($entry)>
603              
604             Create an appropriate hash key. Each subclass must override this
605             stub method as it does nothing except print a warning message
606             and crash the program.
607              
608             =head1 Errors and Warnings
609              
610             Lots.
611              
612             =head1 KNOWN BUGS
613              
614             See TODO.
615              
616             =head1 SEE ALSO
617              
618             File::Spec, Cwd, FileHash::Entry, FileHash::FormatString, Fault::Logger.
619              
620             =head1 AUTHOR
621              
622             Dale Amon
623              
624             =cut
625            
626             #=============================================================================
627             # CVS HISTORY
628             #=============================================================================
629             # $Log: Base.pm,v $
630             # Revision 1.10 2008-08-28 23:35:28 amon
631             # perldoc section regularization.
632             #
633             # Revision 1.9 2008-08-09 20:25:13 amon
634             # Documentation error fixed.
635             #
636             # Revision 1.8 2008-08-09 12:56:01 amon
637             # Used wrong method name. Fixed
638             #
639             # Revision 1.7 2008-08-04 12:12:20 amon
640             # Added unary and binary ops; made init methods synonums for add methods.
641             #
642             # Revision 1.6 2008-07-27 15:16:17 amon
643             # Wrote lexical parse for Entry; error checking on eval and other minor issues.
644             #
645             # Revision 1.5 2008-07-25 14:30:42 amon
646             # Documentation improvements and corrections.
647             #
648             # Revision 1.4 2008-07-24 20:19:43 amon
649             # Just in case I missed anything.
650             #
651             # Revision 1.3 2008-07-24 13:35:26 amon
652             # switch to NeXT style alloc/init format for FileHash and Entry classes.
653             #
654             # Revision 1.2 2008-07-23 21:12:24 amon
655             # Moved notes out of file headers; a few doc updates; added assertion checks;
656             # minor bug fixes.
657             #
658             # 20080722 Dale Amon
659             # Renamed FileHash.pm to Base.pm so it is FileHash::Base.
660             # 20080717 Dale Amon
661             # Split FilenameHash, formerly Directory class, into FileHash
662             # FileHash::Name and FileHash::Content.
663             # 20080625 Dale Amon
664             # Created.
665             1;