File Coverage

blib/lib/Logfile/Tail.pm
Criterion Covered Total %
statement 296 296 100.0
branch 122 122 100.0
condition 26 26 100.0
subroutine 36 36 100.0
pod 6 6 100.0
total 486 486 100.0


line stmt bran cond sub pod time code
1              
2             package Logfile::Tail;
3              
4             =head1 NAME
5              
6             Logfile::Tail - read log files
7              
8             =head1 SYNOPSIS
9              
10             use Logfile::Tail ();
11             my $file = new Logfile::Tail('/var/log/messages');
12             while (<$file>) {
13             # process the line
14             }
15              
16             and later in different process
17              
18             my $file = new Logfile::Tail('/var/log/messages');
19              
20             and continue reading where we've left out the last time. Also possible
21             is to explicitly save the current position:
22              
23             my $file = new Logfile::Tail('/var/log/messages',
24             { autocommit => 0 });
25             my $line = $file->getline();
26             $file->commit();
27              
28             =cut
29              
30 4     4   110747 use strict;
  4         9  
  4         158  
31 4     4   22 use warnings FATAL => 'all';
  4         8  
  4         204  
32              
33             our $VERSION = '0.7';
34              
35 4     4   3398 use Symbol ();
  4         3959  
  4         96  
36 4     4   14690 use IO::File ();
  4         45918  
  4         86  
37 4     4   5786 use Digest::SHA ();
  4         860756  
  4         128  
38 4     4   37 use File::Spec ();
  4         6  
  4         99  
39 4     4   20 use Fcntl qw( O_RDWR O_CREAT );
  4         7  
  4         338  
40 4     4   21 use Cwd ();
  4         6  
  4         18058  
41              
42             sub new {
43 40     40 1 111686 my $class = shift;
44              
45 40         232 my $self = Symbol::gensym();
46 40         735 bless $self, $class;
47 40         1164 tie *$self, $self;
48              
49 40 100       183 if (@_) {
50 39 100       192 $self->open(@_) or return;
51             }
52              
53 32         187 return $self;
54             }
55              
56             my $STATUS_SUBDIR = '.logfile-tail-status';
57             my $CHECK_LENGTH = 512;
58             sub open {
59 39     39 1 65 my $self = shift;
60              
61 39         71 my $filename = shift;
62 39 100 100     267 if (@_ and ref $_[-1] eq 'HASH') {
63 7         27 *$self->{opts} = pop @_;
64             }
65 39 100       193 if (not exists *$self->{opts}{autocommit}) {
66 35         147 *$self->{opts}{autocommit} = 1;
67             }
68              
69 39         163 my ($archive, $offset, $checksum) = $self->_load_data_from_status($filename);
70 39 100       131 return unless defined $offset;
71              
72 35         83 my $need_commit = *$self->{opts}{autocommit};
73 35 100       86 if (not defined $checksum) {
74 11         18 $need_commit = 1;
75             }
76              
77 35 100       188 my ($fh, $content) = $self->_open(defined $archive ? $filename . $archive : $filename, $offset);
78 35 100 100     462 if (not defined $fh) {
    100          
    100          
79 4 100       21 if (not defined $archive) {
80 2         25 return;
81             }
82 2         13 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
83 2 100       12 if (defined $older_fh) {
84 1         2 $fh = $older_fh;
85 1         2 $content = $older_content;
86 1         2 $archive = $older_archive;
87             } else {
88 1         5 return;
89             }
90             } elsif (not defined $checksum) {
91 9         40 $content = $self->_seek_to($fh, 0);
92             } elsif (not defined $content
93             or $checksum ne Digest::SHA::sha256_hex($content)) {
94 9         37 my ($older_fh, $older_archive, $older_content) = $self->_get_archive($archive, 'older', $offset, $checksum);
95 9 100       29 if (defined $older_fh) {
96 5         22 $fh->close();
97 5         67 $fh = $older_fh;
98 5         13 $content = $older_content;
99 5         10 $archive = $older_archive;
100             } else {
101 4         15 $content = $self->_seek_to($fh, 0);
102             }
103             }
104              
105 32         60 my $layers = $_[0];
106 32 100 100     137 if (defined $layers and $layers =~ /<:/) {
107 2         11 $layers =~ s!<:!<:scalar:!;
108             } else {
109 30         82 $layers = '<:scalar';
110             }
111              
112 32         60 my $buffer = '';
113 32         97 *$self->{int_buffer} = \$buffer;
114 32         49 my $int_fh;
115 32     3   48 eval { open $int_fh, $layers, *$self->{int_buffer} };
  32     2   588  
  3         42  
  3         20  
  3         43  
  2         1716  
  2         5  
  2         13  
116 32 100       27681 if ($@) {
117 1         10 warn "$@\n";
118 1         26 return;
119             };
120 31         80 *$self->{int_fh} = $int_fh;
121              
122 31         57 *$self->{_fh} = $fh;
123 31         85 *$self->{data_array} = [ $content ];
124 31         99 *$self->{data_length} = length $content;
125 31         123 *$self->{archive} = $archive;
126              
127 31 100       82 if ($need_commit) {
128 28         88 $self->commit();
129             }
130 31         1298 1;
131             }
132              
133             sub _open {
134 75     75   142 my ($self, $filename, $offset) = @_;
135 75 100       396 my $fh = new IO::File or return;
136 73 100       2250 $fh->open($filename, '<:raw') or return;
137              
138 65 100       3888 if ($offset > 0) {
139 33         125 my $content = $self->_seek_to($fh, $offset);
140 33         91 return ($fh, $content);
141             }
142 32         95 return ($fh, '');
143             }
144              
145             sub _fh {
146 2067     2067   2267 *{$_[0]}->{_fh};
  2067         4135  
147             }
148              
149             sub _seek_to {
150 46     46   161 my ($self, $fh, $offset) = @_;
151              
152 46         82 my $offset_start = $offset - $CHECK_LENGTH;
153 46 100       116 $offset_start = 0 if $offset_start < 0;
154              
155             # no point in checking the return value, seek will
156             # go beyond the end of the file anyway
157 46         272 $fh->seek($offset_start, 0);
158              
159 46         413 my $buffer = '';
160 46         124 while ($offset - $offset_start > 0) {
161 36         172 my $read = $fh->read($buffer, $offset - $offset_start, length($buffer));
162             # $read is not defined for example when we try to read directory
163 36 100 100     759 last if not defined $read or $read <= 0;
164 28         79 $offset_start += $read;
165             }
166 46 100       97 if ($offset_start == $offset) {
167 38         92 return $buffer;
168             } else {
169 8         16 return;
170             }
171             }
172              
173             sub _load_data_from_status {
174 39     39   80 my ($self, $log_filename) = @_;
175 39         2120 my $abs_filename = Cwd::abs_path($log_filename);
176 39 100       129 if (not defined $abs_filename) {
177             # can we access the file at all?
178 1         17 warn "Cannot access file [$log_filename]\n";
179 1         9 return;
180             }
181 38         872 my @abs_stat = stat $abs_filename;
182 38 100 100     758 if (defined $abs_stat[1] and (stat $log_filename)[1] == $abs_stat[1]) {
183 35         71 $log_filename = $abs_filename;
184             }
185              
186 38         152 *$self->{filename} = $log_filename;
187              
188 38         97 my $status_filename = *$self->{opts}{status_file};
189 38 100       99 if (not defined $status_filename) {
190 35         501 $status_filename = Digest::SHA::sha256_hex($log_filename);
191             }
192 38         194 my $status_dir = *$self->{opts}{status_dir};
193 38 100       122 if (not defined $status_dir) {
    100          
194 35         77 $status_dir = $STATUS_SUBDIR;
195             } elsif ($status_dir eq '') {
196 1         2 $status_dir = '.';
197             }
198 38 100       486 if (not -d $status_dir) {
199 4         298 mkdir $status_dir, 0775;
200             }
201 38         753 my $status_path = File::Spec->catfile($status_dir, $status_filename);
202 38         341 my $status_fh = new IO::File $status_path, O_RDWR | O_CREAT;
203 38 100       183452 if (not defined $status_fh) {
204 1         12 warn "Error reading/creating status file [$status_path]\n";
205 1         9 return;
206             }
207 37         115 *$self->{status_fh} = $status_fh;
208              
209 37         636 my $status_line = <$status_fh>;
210 37         93 my ($offset, $checksum, $archive_filename) = (0, undef, undef);
211 37 100       139 if (defined $status_line) {
212 26 100       269 if (not $status_line =~ /^File \[(.+?)\] (?:archive \[(.+)\] )?offset \[(\d+)\] checksum \[([0-9a-z]+)\]\n/) {
213 1         10 warn "Status file [$status_path] has bad format\n";
214 1         8 return;
215             }
216 25         74 my $check_filename = $1;
217 25         51 $archive_filename = $2;
218 25         63 $offset = $3;
219 25         102 $checksum = $4;
220 25 100       77 if ($check_filename ne $log_filename) {
221 1         22 warn "Status file [$status_path] is for file [$check_filename] while expected [$log_filename]\n";
222 1         11 return;
223             }
224             }
225              
226 35         168 return ($archive_filename, $offset, $checksum);
227             }
228              
229             sub _save_offset_to_status {
230 62     62   106 my ($self, $offset) = @_;
231 62         113 my $log_filename = *$self->{filename};
232 62         94 my $status_fh = *$self->{status_fh};
233 62         153 my $checksum = $self->_get_current_checksum();
234 62         967 $status_fh->seek(0, 0);
235 62 100       750 my $archive_text = defined *$self->{archive} ? " archive [@{[ *$self->{archive} ]}]" : '';
  15         74  
236 62         377 $status_fh->printflush("File [$log_filename]$archive_text offset [$offset] checksum [$checksum]\n");
237 62         4977 $status_fh->truncate($status_fh->tell);
238             }
239              
240             sub _push_to_data {
241 2056     2056   2275 my $self = shift;
242 2056         2338 my $chunk = shift;
243 2056 100       4475 if (length($chunk) >= $CHECK_LENGTH) {
244 1         5 *$self->{data_array} = [ substr $chunk, -$CHECK_LENGTH ];
245 1         2 *$self->{data_length} = $CHECK_LENGTH;
246 1         2 return;
247             }
248 2055         2726 my $data = *$self->{data_array};
249 2055         2922 my $data_length = *$self->{data_length};
250 2055         3027 push @$data, $chunk;
251 2055         2314 $data_length += length($chunk);
252 2055         5092 while ($data_length - length($data->[0]) >= $CHECK_LENGTH) {
253 1895         2445 $data_length -= length($data->[0]);
254 1895         4887 shift @$data;
255             }
256 2055         4114 *$self->{data_length} = $data_length;
257             }
258              
259             sub _get_current_checksum {
260 66     66   123 my $self = shift;
261 66         108 my $data_length = *$self->{data_length};
262 66         152 my $data = *$self->{data_array};
263 66         75 my $i = 0;
264 66         434 my $digest = new Digest::SHA('sha256');
265 66 100       1469 if ($data_length > $CHECK_LENGTH) {
266 1         20 $digest->add(substr($data->[0], $data_length - $CHECK_LENGTH));
267 1         2 $i++;
268             }
269 66         183 for (; $i <= $#$data; $i++) {
270 136         579 $digest->add($data->[$i]);
271             }
272 66         667 return $digest->hexdigest();
273             }
274              
275             sub _get_archive {
276 36     36   162 my ($self, $start, $older_newer, $offset, $checksum) = @_;
277 36         98 my @types = ( '-', '.' );
278 36         45 my $start_num;
279 36 100       82 if (defined $start) {
280 27         89 @types = substr($start, 0, 1);
281 27         52 $start_num = substr($start, 1);
282             }
283 36         78 my $filename = *$self->{filename};
284 36         87 for my $t (@types) {
285 43         87 my $srt;
286 43 100       98 if ($t eq '.') {
287 30 100       61 if ($older_newer eq 'newer') {
288 17     98   73 $srt = sub { $_[1] <=> $_[0] };
  98         378  
289             } else {
290 13     34   67 $srt = sub { $_[0] <=> $_[1] };
  34         245  
291             }
292             } else {
293 13 100       29 if ($older_newer eq 'newer') {
294 4     14   24 $srt = sub { $_[0] cmp $_[1] };
  14         66  
295             } else {
296 9     2   53 $srt = sub { $_[1] cmp $_[0] };
  2         8  
297             }
298             }
299 50         134 my @archives = map { "$t$_" } # make it a suffix
  36         59  
300 125 100       416 sort { $srt->($a, $b) } # sort properly
301 125         390 grep { not defined $start_num or $srt->($_, $start_num) == 1} # only newer / older
302 125         281 grep { /^[0-9]+$/ } # only numerical suffixes
303 43         6521 map { substr($_, length($filename) + 1) } # only get the numerical suffixes
304             glob "$filename$t*"; # we look at file.1, file.2 or file-20091231, ...
305 43 100 100     612 if ($older_newer eq 'newer' and -f $filename) {
306 17         38 push @archives, '';
307             }
308 43         123 for my $a (@archives) {
309 40   100     301 my ($fh, $content) = $self->_open($filename . $a, ($offset || 0));
310 40 100       610 if (not defined $fh) {
311 6         22 next;
312             }
313 34 100       63 if (defined $checksum) {
314 14 100 100     134 if (defined $content
315             and $checksum eq Digest::SHA::sha256_hex($content)) {
316 9         69 return ($fh, $a, $content);
317             }
318             } else {
319 20 100       149 return ($fh, ($a eq '' ? undef : $a), $content);
320             }
321 5         20 $fh->close();
322             }
323             }
324 7         24 return;
325             }
326              
327             sub _close_status {
328 31     31   53 my ($self, $offset) = @_;
329 31         82 my $status_fh = delete *$self->{status_fh};
330 31 100       159 $status_fh->close() if defined $status_fh;
331             }
332              
333             sub _getline {
334 2067     2067   2495 my $self = shift;
335 2067         3331 my $fh = $self->_fh;
336 2067 100       3889 if (defined $fh) {
337 2066         2926 my $buffer_ref = *$self->{int_buffer};
338 2089         2465 DO_GETLINE:
339             my $ret = undef;
340 2089         48301 $$buffer_ref = $fh->getline();
341 2089 100       61112 if (not defined $$buffer_ref) {
342             # we are at the end of the current file
343             # we need to check if the file was rotated
344             # in the meantime
345 33         474 my @fh_stat = stat($fh);
346 33         84 my $filename = *$self->{filename};
347 33 100       983 my @file_stat = stat($filename . ( defined *$self->{archive} ? *$self->{archive} : '' ));
348 33 100 100     383 if (not @file_stat or "@fh_stat[0, 1]" ne "@file_stat[0, 1]") {
    100          
349             # our file was rotated, or generally
350             # is no longer where it was when
351             # we started to read
352 4         29 my ($older_fh, $older_archive, $older_content)
353             = $self->_get_archive(*$self->{archive}, 'older', $fh->tell, $self->_get_current_checksum);
354 4 100       17 if (not defined $older_fh) {
355             # we have lost the file / sync
356 1         5 return;
357             }
358 3         148 *$self->{_fh}->close();
359 3         55 *$self->{_fh} = $fh = $older_fh;
360 3         17 *$self->{data_array} = [ $older_content ];
361 3         8 *$self->{data_length} = length $older_content;
362 3         8 *$self->{archive} = $older_archive;
363 3         59 goto DO_GETLINE;
364             } elsif (defined *$self->{archive}) {
365             # our file was not rotated
366             # however, if our file is in fact
367             # a rotate file, we should go to the
368             # next one
369 21         85 my ($newer_fh, $newer_archive) = $self->_get_archive(*$self->{archive}, 'newer');
370 21 100       61 if (not defined $newer_fh) {
371 1         4 return;
372             }
373 20         79 *$self->{_fh}->close();
374 20         303 *$self->{_fh} = $fh = $newer_fh;
375 20         132 *$self->{data_array} = [ '' ];
376 20         43 *$self->{data_length} = 0;
377 20         29 *$self->{archive} = $newer_archive;
378 20         288 goto DO_GETLINE;
379             }
380 8         33 return;
381             }
382 2056         4606 $self->_push_to_data($$buffer_ref);
383 2056         4317 seek(*$self->{int_fh}, 0, 0);
384 2056         49187 my $line = *$self->{int_fh}->getline();
385 2056         58165 return $line;
386             } else {
387 1         3 return undef;
388             }
389             }
390              
391             sub getline {
392 43     43 1 1353 my $self = shift;
393 43         130 my $ret = $self->_getline();
394 4     4   38 no warnings 'uninitialized';
  4         9  
  4         578  
395 43 100       144 if (*$self->{opts}{autocommit} == 2) {
396 1         3 $self->commit();
397             }
398 43         283 return $ret;
399             }
400              
401             sub getlines {
402 7     7 1 21 my $self = shift;
403 7         14 my @out;
404 7         10 while (1) {
405 2024         4484 my $l = $self->_getline();
406 2024 100       4688 if (not defined $l) {
407 7         17 last;
408             }
409 2017         3493 push @out, $l;
410             }
411 4     4   66 no warnings 'uninitialized';
  4         7  
  4         1546  
412 7 100       36 if (*$self->{opts}{autocommit} == 2) {
413 1         4 $self->commit();
414             }
415 7         460 @out;
416             }
417              
418             sub commit {
419 62     62 1 95 my $self = shift;
420 62         107 my $fh = *$self->{_fh};
421 62         243 my $offset = $fh->tell;
422 62         1831 $self->_save_offset_to_status($offset);
423             }
424              
425             sub close {
426 31     31 1 3621 my $self = shift;
427 31 100       157 if (*$self->{opts}{autocommit}) {
428 27         60 $self->commit();
429             }
430 31         1309 $self->_close_status();
431 31         701 my $fh = delete *$self->{_fh};
432 31 100       159 $fh->close() if defined $fh;
433             }
434              
435             sub TIEHANDLE() {
436 41 100   41   1342 if (ref $_[0]) {
437             # if we already have object, probably called from new(),
438             # just return that
439 40         138 return $_[0];
440             } else {
441 1         2 my $class = shift;
442 1         5 return $class->new(@_);
443             }
444             }
445              
446             sub READLINE() {
447 29 100   29   27624 goto &getlines if wantarray;
448 24         118 goto &getline;
449             }
450              
451             sub CLOSE() {
452 1     1   2 my $self = shift;
453 1         4 $self->close();
454             }
455              
456             sub DESTROY() {
457 39     39   5017 my $self = shift;
458 39 100       668 $self->close() if defined *$self->{_fh};
459             }
460              
461             1;
462              
463             =head1 DESCRIPTION
464              
465             Log files are files that are generated by various running programs.
466             They are generally only appended to. When parsing information from
467             log files, it is important to only read each record / line once,
468             both for performance and for accounting and statistics reasons.
469              
470             The C provides an easy way to achieve the
471             read-just-once processing of log files.
472              
473             The module remembers for each file the position where it left
474             out the last time, in external status file, and upon next invocation
475             it seeks to the remembered position. It also stores checksum
476             of 512 bytes before that position, and if the checksum does not
477             match the file content the next time it is read, it will try to
478             find the rotated file and read the end of it before advancing to
479             newer rotated file or to the current log file.
480              
481             Both .num and -date suffixed rotated files are supported.
482              
483             =head1 METHODS
484              
485             =over 4
486              
487             =item new()
488              
489             =item new( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
490              
491             =item new( FILENAME, IOLAYERS, [ { attributes } ] )
492              
493             Constructor, creates new C object. Like C,
494             it passes any parameters to method C; it actually creates
495             an C handle internally.
496              
497             Returns new object, or undef upon error.
498              
499             =item open( FILENAME [,MODE [,PERMS]], [ { attributes } ] )
500              
501             =item open( FILENAME, IOLAYERS, [ { attributes } ] )
502              
503             Opens the file using C. If the file was read before, the
504             offset where the reading left out the last time is read from an
505             external file in the ./.logfile-tail-status directory and seek is
506             made to that offset, to continue reading at the last remembered
507             position.
508              
509             If however checksum, which is also stored with the offset, does not
510             match the current content of the file (512 bytes before the offset
511             are checked), the module assumes that the file was rotated / reused
512             / truncated in the mean time since the last read. It will try to
513             find the checksum among the rotated files. If no match is found,
514             it will reset the offset to zero and start from the beginning of
515             the file.
516              
517             Returns true, or undef upon error.
518              
519             The attributes are passed as an optional hashref of key => value
520             pairs. The supported attribute is
521              
522             =over 4
523              
524             =item autocommit
525              
526             Value 0 means that no saving takes place; you need to save explicitly
527             using the commit() method.
528              
529             Value 1 (the default) means that position is saved when the object is
530             closed via explicit close() call, or when it is destroyed. The value
531             is also saved upon the first open.
532              
533             Value 2 causes the position to be save in all cases as value 1,
534             plus after each successful read.
535              
536             =item status_dir
537              
538             The attribute specifies the directory (or subdirectory of current
539             directory) which is used to hold status files. By default,
540             ./.logfile-tail-status directory is used. To store the status
541             files in the current directory, pass empty string or dot (.).
542              
543             =item status_file
544              
545             The attribute specifies the name of the status file which is used to
546             hold the offset and SHA256 checksum of 512 bytes before the offset.
547             By default, SHA256 of the full (absolute) logfile filename is used
548             as the status file name.
549              
550             =back
551              
552             =item commit()
553              
554             Explicitly save the current position and checksum in the status file.
555              
556             Returns true, or undef upon error.
557              
558             =item close()
559              
560             Closes the internal filehandle. It stores the current position
561             and checksum in an external file in the ./.logfile-tail-status
562             directory.
563              
564             Returns true, or undef upon error.
565              
566             =item getline()
567              
568             Line <$fh> in scalar context.
569              
570             =item getlines()
571              
572             Line <$fh> in list context.
573              
574             =back
575              
576             =head1 AUTHOR AND LICENSE
577              
578             Copyright (c) 2010 Jan Pazdziora.
579              
580             Logfile::Tail is free software. You can redistribute it and/or modify
581             it under the terms of either:
582              
583             a) the GNU General Public License, version 2 or 3;
584              
585             b) the Artistic License, either the original or version 2.0.
586