File Coverage

blib/lib/File/Signature.pm
Criterion Covered Total %
statement 90 111 81.0
branch 35 52 67.3
condition 4 21 19.0
subroutine 15 26 57.6
pod 7 18 38.8
total 151 228 66.2


line stmt bran cond sub pod time code
1             # $Id: Signature.pm,v 1.9 2003/08/12 19:53:03 jeremy Exp $
2             package File::Signature;
3 9     9   288629 use strict;
  9         26  
  9         396  
4              
5 9     9   54 use vars qw( $VERSION );
  9         20  
  9         842  
6             $VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
7              
8 9     9   52 use Digest::MD5;
  9         21  
  9         4205  
9              
10             my @FIELDS = qw( digest ino mode uid gid size mtime pathname );
11             my @ERRFIELDS = qw( failure pathname errormsg );
12              
13             my %CONFIG = (
14             allow_relative_paths => 0,
15             stringify_separator => "\0",
16             use_digest_format => 'hex',
17             );
18              
19             sub configure {
20 0 0   0 1 0 _throw_exception( "argument required" ) unless @_;
21 0 0       0 _throw_exception( "odd argument list (not a hash?)" ) unless 0 == @_ % 2;
22 0         0 my %options = @_;
23              
24 0         0 my $i = 0;
25 0         0 for my $opt ( keys %options ) {
26 0 0       0 $CONFIG{ $opt } = $options{ $opt }, $i++ if exists $CONFIG{ $opt };
27             }
28 0 0       0 return $i unless wantarray;
29 0         0 return %CONFIG;
30             }
31              
32              
33              
34             ##
35             # Note:
36             # Stringified error objects start with the stringify_separator. That
37             # allows us to support binary digests. Prefixing both is the alternative.
38             #
39             use overload '""' => sub {
40 27 100   27   115 if ( $_[0]->error ) {
41 2         8 return join $CONFIG{ stringify_separator }, "", "ERROR", $_[0]->error;
42             } else {
43 25         47 return join $CONFIG{ stringify_separator }, @{ $_[0] }{ @FIELDS };
  25         546  
44             }
45 9     9   18599 };
  9         11995  
  9         97  
46              
47              
48              
49             sub new_from_string {
50 15     15 1 2207 my $class = shift;
51 15         26 my $string = shift;
52 15 100       47 _throw_exception( "argument required" ) unless defined $string;
53 14 100       41 _throw_exception( "argument was null" ) unless length $string;
54 13         27 my $self = { };
55              
56             # For convenience.
57 13         155 my $sep_regex = qr/\Q$CONFIG{ stringify_separator }\E/;
58              
59 13 100       125 if ($string =~ s/^${ sep_regex }ERROR$sep_regex//) {
60 3         14 my @fields = split /$sep_regex/, $string, scalar( @ERRFIELDS );
61 3 100       11 _throw_exception( "bad errobj string" ) unless @fields == @ERRFIELDS;
62 2         4 @{ $self }{ @ERRFIELDS } = @fields;
  2         7  
63             } else {
64 10         91 my @fields = split /$sep_regex/, $string, scalar( @FIELDS );
65 10 100       40 _throw_exception( "bad object string" ) unless @fields == @FIELDS;
66 9         18 @{ $self }{ @FIELDS } = @fields;
  9         83  
67             }
68 11         94 bless $self, $class;
69             }
70              
71              
72             sub new {
73 25     25 1 6819 my $class = shift;
74 25         51 my $pathname = shift;
75              
76 25 100       85 _throw_exception( "pathname required" ) unless defined $pathname;
77 24 100       70 _throw_exception( "pathname was null" ) unless length $pathname;
78              
79 23 100 66     194 unless ( $CONFIG{ allow_relative_paths } or 0 == index( $pathname, '/' ) ) {
80 14         121 require File::Spec;
81 14         563 $pathname = File::Spec->rel2abs( $pathname );
82             }
83              
84 23         677 my @stat = stat $pathname;
85 23 100       696 return __PACKAGE__->_bad_sig( "stat failure", $pathname, $! ) unless @stat;
86              
87 16 50       1482 open my $fh, "<", $pathname
88             or return __PACKAGE__->_bad_sig( "open failure", $pathname, $! );
89 16         29 my $digest = do {
90 16         39 local $_ = $CONFIG{ use_digest_format };
91 16 0 33     725 /hex/ and Digest::MD5->new->addfile($fh)->hexdigest
      0        
      33        
      0        
      0        
92             or /b64/ and Digest::MD5->new->addfile($fh)->b64digest
93             or /bin/ and Digest::MD5->new->addfile($fh)->digest
94             or Digest::MD5->new->addfile($fh)->digest;
95             };
96 16         239 close $fh;
97              
98 16         33 my $self = { };
99 16         50 @{ $self }{ @FIELDS } = ( $digest, @stat[1,2,4,5,7,9], $pathname );
  16         133  
100              
101 16         114 return bless $self, $class;
102             }
103              
104              
105             # This is a private constructor for bad signatures.
106             sub _bad_sig {
107 7     7   22 my $class = shift;
108 7         68 my ( $failure, $pathname, $error ) = @_;
109 7         36 my $self = {
110             failure => $failure,
111             pathname => $pathname,
112             errormsg => $error,
113             };
114 7         38 bless $self, $class;
115             }
116              
117              
118             ## Both normal signature objects and error objects will have a pathname.
119 2     2 0 13 sub pathname { shift->{ pathname } }
120              
121              
122             ##
123             # The following are reserved accessors. They are unimplemented because
124             # I'm still considering their possible value. I believe that providing
125             # this information to the user is probably outside this module's scope
126             # but it may be useful just the same. Out of these, I am most inclined
127             # to provide the failure() and errormsg() routines.
128 0     0 0 0 sub failure { _throw_exception( 'unimplemented' ) }
129 0     0 0 0 sub errormsg { _throw_exception( 'unimplemented' ) }
130 0     0 0 0 sub digest { _throw_exception( 'unimplemented' ) }
131 0     0 0 0 sub ino { _throw_exception( 'unimplemented' ) }
132 0     0 0 0 sub mode { _throw_exception( 'unimplemented' ) }
133 0     0 0 0 sub uid { _throw_exception( 'unimplemented' ) }
134 0     0 0 0 sub gid { _throw_exception( 'unimplemented' ) }
135 0     0 0 0 sub size { _throw_exception( 'unimplemented' ) }
136 0     0 0 0 sub mtime { _throw_exception( 'unimplemented' ) }
137             #
138             ##
139              
140              
141             sub error {
142 84     84 1 147 my $self = shift;
143 84 100       569 return undef unless exists $self->{ failure };
144 16 100       41 if (wantarray) {
145 3         15 ( @{ $self }{ @ERRFIELDS } );
  3         24  
146             } else {
147 13         22 my $msg = __PACKAGE__;
148 13         52 $msg .= " ERROR: $self->{ failure } on '$self->{ pathname }'";
149 13 50       57 $msg .= " ($self->{ errormsg })" if exists $self->{ errormsg };
150 13         90 return $msg;
151             }
152             }
153              
154              
155             # Currently, this sub doesn't make sense it exists to provide functionality
156             # that will only become necessary if I add an update() method.
157             sub was_error {
158 0     0 0 0 my $self = shift;
159 0 0 0     0 return 1 if exists $self->{ old_sig } and $self->{ old_sig }->error;
160 0         0 return 0;
161             }
162              
163              
164            
165             sub old_and_new {
166 21     21 1 9368 my $self = shift;
167 21         34 my $field = shift;
168              
169             # It doesn't make much sense to compare old and new error.
170 21 100       44 _throw_exception( "bad method call " . $self->error ) if ( $self->error );
171              
172 20 50       643 return undef unless grep $field, @FIELDS;
173 20         108 return ( $self->{ old_sig }{ $field }, $self->{ $field } );
174             }
175              
176              
177              
178             sub is_same {
179 2     2 1 3 my $self = shift;
180              
181             # It doesn't make much sense to see if an error has changed.
182 2 50       6 _throw_exception( "bad method call " . $self->error ) if ( $self->error );
183              
184 2         6 $self->_check_again;
185 2         6 return "$self" eq "$self->{old_sig}";
186             }
187              
188              
189              
190             sub changed {
191 9     9 1 1002016 my $self = shift;
192              
193             # It doesn't make much sense to see if an error has changed.
194 9 100       28 _throw_exception( "bad method call " . $self->error ) if ( $self->error );
195              
196 7         28 $self->_check_again;
197 7 100       30 return "$self" ne "$self->{old_sig}" unless wantarray;
198             # XXX is string eq good enough here?
199 1         2 grep { $self->{ $_ } ne $self->{ old_sig }{ $_ } } @FIELDS;
  8         29  
200             }
201              
202              
203              
204             ## Private instance methods
205              
206             # This will handle error objects too but it shouldn't get any, right now.
207             # The functionality is there in case I later decide to add an update()
208             # method to just update the signature rather than check it's state. For the
209             # time being, File::Signature->new($existing_sig->pathname) will have to do.
210             sub _check_again {
211 9     9   14 my $self = shift;
212              
213             # Duplicate our $self and key the duplicate by "old_sig".
214 9         40 $self->{ old_sig } = __PACKAGE__->new_from_string( "$self" );
215              
216             # Create a new signature with the same path name.
217 9         86 my $newsig = __PACKAGE__->new( $self->{ pathname } );
218              
219             # Clean our $self up by removing old fields.
220 9         41 delete $self->{ $_ } for grep { $_ ne 'pathname' } @FIELDS, @ERRFIELDS;
  99         236  
221              
222             # Copy the new signature into our $self one field at a time.
223 9 50       27 if ( $newsig->error ) {
224 0         0 $self->{ $_ } = $newsig->{ $_ } for ( @ERRFIELDS );
225             } else {
226 9         122 $self->{ $_ } = $newsig->{ $_ } for ( @FIELDS );
227             }
228             }
229            
230              
231              
232             sub _throw_exception {
233 9     9   19 my $msg = shift;
234 9         48 my $subroutine = (caller(1))[3];
235 9         58 require Carp;
236 9         1743 Carp::croak( "${subroutine}(): " . $msg );
237             }
238              
239              
240              
241              
242             1;
243             __END__
244             # Below is stub documentation for your module. You'd better edit it!
245              
246             =head1 NAME
247              
248             File::Signature - Detect changes to a file's content or attributes.
249              
250             =head1 SYNOPSIS
251              
252             use File::Signature;
253             my $sig = File::Signature->new('/some/file');
254            
255             # If you have a stringified signature stored in $string
256             # you can create a File::Signature object from it.
257             my $sig = File::Signature->new_from_string($string);
258              
259             if (my $err = $sig->error) {
260             warn $err, "\n";
261             }
262             # You can use a signature object to re-check the same file.
263             if ( $sig->is_same() ) { print "Ok. The signature is the same.\n" }
264             if ( $sig->changed() ) { print "Uh Oh! The signature has changed.\n" }
265              
266             my @digests = $sig->old_and_new('digest');
267             my @inodes = $sig->old_and_new('ino');
268             my @modes = $sig->old_and_new('mode');
269             my @uid = $sig->old_and_new('uid');
270             my @gid = $sig->old_and_new('gid');
271             my @mtime = $sig->old_and_new('mtime');
272              
273             # A slightly more worthwhile use...
274             my @fields = $sig->changed();
275             for my $field (@fields) {
276             printf "$field was: %s but changed to %s.\n",
277             $sig->old_and_new($field);
278             }
279              
280              
281             =head1 ABSTRACT
282              
283             This perl library uses perl5 objects to assist in determining whether a file's
284             contents or attributes have changed. It maintains several pieces of information
285             about the file: a digest (currently only MD5 is supported), its inode number,
286             its mode, the uid of its owner, the gid of its group owner, and its last
287             modification time. A File::Signature object is closely associated with a single
288             pathname. It provides a way to compare the state of a file over different
289             points in time; it isn't useful for comparing different files.
290              
291             =head1 DESCRIPTION
292              
293             This module provides a way to monitor files for changes. It implements an object
294             oriented interface to file "signatures." In the case of this module, a
295             signature includes an MD5 digest (other digests may be added later), the file's
296             size, its inode number, its mode, its owner's uid, its group's gid, and its
297             mtime. This information is associated with a file by the file's "pathname." The
298             pathname is considered to be the file's unique identifier. In reality, a file
299             may have more than one pathname, but this module doesn't recognize that. It
300             will simply treat two differing pathnames as two different files, even if they
301             refer to the same file.
302              
303             As this module checks whether a file changes over time, a minimal use of it
304             would include the time when the signature was created and a different
305             time when the signature is regenerated and compared with the previous one. The
306             amount of time between these checks is arbitrary. This module makes it
307             easy to save a signature object and then load it and check for consistency
308             at a later time, whether seconds or years have passed.
309              
310             =head2 CONSTRUCTORS
311              
312             =over 4
313              
314             =item new()
315              
316             This constructor requires a pathname argument. If one is not provided, it will
317             throw an exception (i.e. croak.) If the pathname cannot be stat()'d or if it
318             cannot be read, the object returned will hold an error accessible via the
319             error() instance method. The pathname should be absolute and, if it isn't it
320             well be resolved to an absolute pathname unless the "allow_relative_paths"
321             configure option is provided. See L</configure()> below.
322              
323             =item new_from_string()
324              
325             This constructor takes a single argument, a previously stringified signature
326             object, and returns a new signature object created from the string.
327              
328             =back
329              
330             =head2 INSTANCE METHODS
331              
332             =over 4
333              
334             =item error()
335              
336             If there was a non-fatal error when the object was constructed or when the last
337             check was performed, a signature error object is returned instead. This method
338             determines whether the object is an error object. It returns false if it isn't
339             and a true value if it is. The true value in that case will be a human readable
340             error message in scalar context or, in list context, list containing a
341             "failure" message, the pathname, and an optional system error message.
342              
343             =item is_same()
344              
345             Updates the signature and checks whether it is the same. It returns true if it
346             is and false if it isn't. It will throw an exception (i.e. croak) if the
347             current signature object reports an error.
348              
349             =item changed()
350              
351             Updates the signature and checks whether it is has changed. It returns true if
352             it has and false if it hasn't. It will throw an exception (i.e. croak) if the
353             current signature object reports an error.
354              
355             =item old_and_new()
356              
357             This method requires a fieldname to be passes as a string. It returns a
358             two-element list consisting of the previous and current value for the field
359             with the supplied fieldname. If the fieldname is not recognized, it will return
360             undef. This is used primarily to determine what has changed once a change has
361             been detected with is_same() or changed(). The currently accepted fieldnames
362             are any of qw( digest ino mode uid gid size mtime pathname ). Note that the old
363             and new pathname fields should always be the same.
364              
365             =back
366              
367             =head2 OTHER CLASS METHODS
368              
369             =over 4
370              
371             =item configure()
372              
373             This is used to configure special behavior for all instances. The options are
374             passes as hash where the keys are option names and the values are the desired
375             settings. The following keys are recognized:
376              
377             =over 4
378              
379             =item use_digest_format
380              
381             This key may be either 'bin', 'hex', or 'b64' and will determine whether the
382             digest will be stored as a binary string, a string of hexadecimals, or a
383             base64 encoded string. The default is 'hex', but that should not be relied upon
384             as it may change in the future. An unrecognized value will result in a binary
385             string representation just as if 'bin' had been the value. If a binary string
386             is used, the L</stringify_separator> should not be changed from "\0"!
387              
388             =item allow_relative_paths
389              
390             A true value for this key will result in relative paths being permitted as the
391             pathname for signature objects. Usually, when a relative pathname is given in
392             a call to the new() constructor, the absolute path is determined. This option
393             disables that behavior. Changing this option is NOT RECOMMENDED.
394              
395             =item stringify_separator
396              
397             This option changes the field separator that is used when a signature object is
398             stringified. By default this separator is a null ("\0"). It can be changed to
399             any string but the string used must never appear in any of the fields. This
400             includes the fields of signature error objects which sometimes contain system
401             generated error messages. For example, colons and forward slashes are bad
402             choices. Changing this option is NOT RECOMMENDED.
403              
404             =back
405              
406             =back
407              
408             =head2 EXCEPTIONS
409              
410             This is a list of all exceptions that thrown by File::Signature:
411              
412             =over 4
413              
414             =item "argument required"
415              
416             Thrown by configure() and new_from_string() when called with no
417             arguments.
418              
419             =item "odd argument list (not a hash?)"
420              
421             Thrown by configure() when called with an odd number of arguments. (It is to be
422             called with a hash.)
423              
424             =item "argument was null"
425              
426             Thrown by new_from_string() when called with an empty string.
427              
428             =item "bad errobj string"
429              
430             Thrown by new_from_string() when something that looks like a stringified
431             error object results in the wrong number of fields.
432              
433             =item "bad object string"
434              
435             Thrown by new_from_string() when something that looks like a stringified
436             signature object results in the wrong number of fields.
437              
438             =item "pathname required"
439              
440             Thrown by new() when called without an argument.
441              
442             =item "pathname was null"
443              
444             Thrown by new() when called without a null string as an argument.
445              
446             =item "bad method call"
447              
448             Thrown by is_same(), changed(), and old_and_new() when called on an error
449             object.
450              
451             =back
452              
453             =head2 EXPORT
454              
455             None.
456              
457             =head1 CHANGES
458              
459             $Log: Signature.pm,v $
460             Revision 1.9 2003/08/12 19:53:03 jeremy
461             Fixups to tests.
462              
463             Revision 1.8 2003/08/11 16:53:41 jeremy
464             Fixed bad POD.
465              
466             Revision 1.7 2003/06/13 03:58:32 jeremy
467             Bug fixes, doc updates, minor changes.
468              
469             Revision 1.6 2003/06/12 02:49:37 jeremy
470             _throw_exception() fixed. Additional error states handled in
471             constructors.
472              
473             Revision 1.5 2003/06/10 22:03:11 jeremy
474             POD updates.
475              
476             Revision 1.4 2003/06/08 12:59:25 jeremy
477             POD changes.
478              
479             Revision 1.3 2003/06/08 12:36:24 jeremy
480             More minor prepping for RCS.
481              
482             Revision 1.2 2003/06/08 12:33:38 jeremy
483             Minor touch-ups for RCS prepping.
484              
485              
486             =head1 SEE ALSO
487              
488             L<perlfunc/"stat">, L<MD5::Digest>, L<stat(2)>
489              
490             Mention other useful documentation such as the documentation of
491             related modules or operating system documentation (such as man pages
492             in UNIX), or any relevant external documentation such as RFCs or
493             standards.
494              
495             =head1 AUTHOR
496              
497             Jeremy Madea, E<lt>jdm@pobox.comE<gt>
498              
499             =head1 COPYRIGHT AND LICENSE
500              
501             Copyright (C) 2003 by Jeremy Madea. All rights reserved.
502              
503             This library is free software; you can redistribute it and/or modify
504             it under the same terms as Perl itself.
505              
506             =cut