File Coverage

blib/lib/File/Locate/Iterator.pm
Criterion Covered Total %
statement 80 91 87.9
branch 53 66 80.3
condition 2 3 66.6
subroutine 10 12 83.3
pod 3 3 100.0
total 148 175 84.5


line stmt bran cond sub pod time code
1             # Copyright 2009, 2010, 2011, 2012, 2013, 2014, 2017, 2018, 2019 Kevin Ryde.
2             #
3             # This file is part of File-Locate-Iterator.
4             #
5             # File-Locate-Iterator is free software; you can redistribute it and/or
6             # modify it under the terms of the GNU General Public License as published
7             # by the Free Software Foundation; either version 3, or (at your option)
8             # any later version.
9             #
10             # File-Locate-Iterator is distributed in the hope that it will be useful,
11             # but WITHOUT ANY WARRANTY; without even the implied warranty of
12             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13             # Public License for more details.
14             #
15             # You should have received a copy of the GNU General Public License along
16             # with File-Locate-Iterator; see the file COPYING. Failing that, go to
17             # .
18              
19              
20             # Maybe:
21             # ignore_case globs and suffixes, not easily done to regexs
22             # glob_ignore_case
23             # suffix_ignore_case
24             #
25              
26              
27             package File::Locate::Iterator;
28 9     9   137747 use 5.006; # for qr//, and open anonymous handles
  9         74  
29 9     9   43 use strict;
  9         23  
  9         196  
30 9     9   45 use warnings;
  9         21  
  9         244  
31 9     9   45 use Carp;
  9         15  
  9         737  
32              
33 9     9   53 use DynaLoader;
  9         28  
  9         1863  
34             our @ISA = ('DynaLoader');
35              
36             our $VERSION = 28;
37              
38             # uncomment this to run the ### lines
39             #use Devel::Comments;
40              
41              
42             if (eval { __PACKAGE__->bootstrap($VERSION) }) {
43             ### FLI next() from XS ...
44             } else {
45             ### FLI next() in perl, XS didn't load: $@
46              
47             eval "\n#line ".(__LINE__+1)." \"".__FILE__."\"\n" . <<'HERE' or die;
48             use strict;
49             use warnings;
50             use File::FnMatch;
51              
52             sub _UNEXPECTED_EOF {
53             my ($self) = @_;
54             undef $self->{'entry'};
55             croak 'Invalid database contents (unexpected EOF)';
56             }
57             sub _ERROR_READING {
58             my ($self) = @_;
59             undef $self->{'entry'};
60             croak 'Error reading database: ',$!;
61             }
62             sub _BAD_SHARE {
63             my ($self, $sharelen) = @_;
64             undef $self->{'entry'};
65             croak "Invalid database contents (bad share length $sharelen)";
66             }
67             sub next {
68             my ($self) = @_;
69             ### FLI PP next()
70              
71             my $sharelen = $self->{'sharelen'};
72             my $entry = $self->{'entry'};
73             my $regexp = $self->{'regexp'};
74             my $globs = $self->{'globs'};
75              
76             if (my $mref = $self->{'mref'}) {
77             my $pos = $self->{'pos'};
78             MREF_LOOP: for (;;) {
79             #### pos in map: sprintf('%#x', $pos)
80             if ($pos >= length ($$mref)) {
81             undef $self->{'entry'};
82             return; # end of file
83             }
84              
85             my ($adjshare) = unpack 'c', substr ($$mref, $pos++, 1);
86             if ($adjshare == -128) {
87             #### 2byte pos: sprintf('%#X', $pos)
88             # print ord(substr ($$mref,$pos,1)),"\n";
89             # print ord(substr ($$mref,$pos+1,1)),"\n";
90              
91             if ($pos+2 > length ($$mref)) { goto &_UNEXPECTED_EOF; }
92              
93             # for perl 5.10 up could use 's>' for signed 16-bit big-endian,
94             # instead of getting unsigned and stepping down
95             ($adjshare) = unpack 'n', substr ($$mref, $pos, 2);
96             if ($adjshare >= 32768) { $adjshare -= 65536; }
97              
98             $pos += 2;
99             }
100             ### $adjshare
101             $sharelen += $adjshare;
102             # print "share now $sharelen\n";
103             if ($sharelen < 0 || $sharelen > length($entry)) {
104             push @_, $sharelen; goto &_BAD_SHARE;
105             }
106              
107             my $end = index ($$mref, "\0", $pos);
108             # print "$pos to $end\n";
109             if ($end < 0) { goto &_UNEXPECTED_EOF; }
110              
111             $entry = (substr($entry,0,$sharelen)
112             . substr ($$mref, $pos, $end-$pos));
113             $pos = $end + 1;
114              
115             if ($regexp) {
116             last if $entry =~ $regexp;
117             } elsif (! $globs) {
118             last;
119             }
120             if ($globs) {
121             foreach my $glob (@$globs) {
122             last MREF_LOOP if File::FnMatch::fnmatch($glob,$entry)
123             }
124             }
125             }
126             $self->{'pos'} = $pos;
127              
128             } else {
129             local $/ = "\0"; # readline() to \0
130              
131             my $fh = $self->{'fh'};
132             ### pos tell(fh): sprintf('%#x',tell($fh))
133             IO_LOOP: for (;;) {
134             my $adjshare;
135             unless (my $got = read $fh, $adjshare, 1) {
136             if (defined $got) {
137             undef $self->{'entry'};
138             return; # EOF
139             }
140             goto &_ERROR_READING;
141             }
142              
143             ($adjshare) = unpack 'c', $adjshare;
144             if ($adjshare == -128) {
145             my $got = read $fh, $adjshare, 2;
146             if (! defined $got) { goto &_ERROR_READING; }
147             if ($got != 2) { goto &_UNEXPECTED_EOF; }
148              
149             # for perl 5.10 up could use 's>' for signed 16-bit big-endian
150             # pack, instead of getting unsigned and stepping down
151             ($adjshare) = unpack 'n', $adjshare;
152             if ($adjshare >= 32768) { $adjshare -= 65536; }
153             }
154             ### $adjshare
155              
156             $sharelen += $adjshare;
157             ### share now: $sharelen
158             if ($sharelen < 0 || $sharelen > length($entry)) {
159             push @_, $sharelen; goto &_BAD_SHARE;
160             }
161              
162             my $part;
163             {
164             # perlfunc.pod of 5.10.0 for readline() says you can clear $!
165             # then check it afterwards for an error indication, but that's
166             # wrong, $! ends up set to EBADF when filling the PerlIO buffer,
167             # which means if the readline crosses a 1024 byte boundary
168             # (something in attempting a fast gets then falling back ...)
169              
170             $part = readline $fh;
171             if (! defined $part) { goto &_UNEXPECTED_EOF; }
172              
173             ### part: $part
174             chomp $part or goto &_UNEXPECTED_EOF;
175             }
176              
177             $entry = substr($entry,0,$sharelen) . $part;
178              
179             if ($regexp) {
180             last if $entry =~ $regexp;
181             } elsif (! $globs) {
182             last;
183             }
184             if ($globs) {
185             foreach my $glob (@$globs) {
186             last IO_LOOP if File::FnMatch::fnmatch($glob,$entry)
187             }
188             }
189             }
190             }
191              
192             $self->{'sharelen'} = $sharelen;
193             return ($self->{'entry'} = $entry);
194             }
195              
196             1;
197              
198             HERE
199             }
200              
201 9     9   60 use constant default_use_mmap => 'if_sensible';
  9         18  
  9         9647  
202             my $header = "\0LOCATE02\0";
203              
204              
205             # Default path these days is /var/cache/locate/locatedb.
206             #
207             # Back in findutils 4.1 it was $(localstatedir)/locatedb, but there seems to
208             # have been no way to ask about the location.
209             #
210             sub default_database_file {
211             # my ($class) = @_;
212 0 0   0 1 0 if (defined (my $env = $ENV{'LOCATE_PATH'})) {
213 0         0 return $env;
214             } else {
215 0         0 return '/var/cache/locate/locatedb';
216             }
217             }
218              
219             # The fields, all meant to be private, are:
220             #
221             # regexp
222             # qr// regexp of all the 'regexp', 'regexps', 'suffix' and 'suffixes'
223             # parameters. If no such matches then no such field. When the field
224             # exists an entry must match the regexp or is skipped.
225             #
226             # globs
227             # arrayref of strings which are globs to fnmatch(). If no globs then no
228             # such field. When the field exists an entry must match at least one of
229             # the globs.
230             #
231             # mref
232             # Ref to a scalar which holds the database contents, or undef if using
233             # fh instead. It's either a ref to the 'database_str' parameter passed
234             # in, or a ref to a scalar created as an mmap of the file. The mmap one
235             # is shared among iterators through the File::Locate::Iterator::FileMap
236             # caching.
237             #
238             # fh
239             # When mref is undef, ref file handle which is to be read from,
240             # otherwise no such field. This can be either the 'database_fh'
241             # parameter or an opened anonymous handle of the 'database_file'
242             # parameter.
243             #
244             # When mmap is used the 'database_fh' is not held here. The mmap is
245             # made (or rather, looked up in the FileMap cache), and the handle is
246             # then no longer needed and can be closed or garbage collected in the
247             # caller.
248             #
249             # fh_start
250             # When fh is set, the tell($fh) position just after the $header in that
251             # fh. This is where to seek() back to for a $it->rewind. If tell()
252             # failed then this is -1 and $it->rewind is not possible.
253             #
254             # Normally fh_start is simply length($header) for a database starting at
255             # the start of the file, but a database_fh arg which is positioned at
256             # some offset into a file can be read and remembering an fh_start
257             # position lets $it->rewind work on it too.
258             #
259             # fm
260             # When using mmap, a File::Locate::Iterator::FileMap object which is the
261             # cache entry for the database file, otherwise no such field. This is
262             # hung onto to keep it alive while in use. $self->{'mref'} is
263             # $fm->mmapref in this case.
264             #
265             # pos
266             # When mref is not undef, an integer offset into the $$mref string which
267             # is the current read position. The file header is checked in new() so
268             # the initial value is length($header), ie. 10, the position of the
269             # first entry (or possibly EOF).
270             #
271             # entry
272             # String of the last database entry returned, or no such field before
273             # the first is read, or undef after EOF is hit. Might be undef instead
274             # of not existing if a hypothetical seek() goes back to the start of the
275             # file.
276             #
277             # sharelen
278             # Integer which is the number of leading bytes of 'entry' which the next
279             # entry will share with that previous entry. Initially 0.
280             #
281             # This is modified successively by the "adjshare" of each entry as each
282             # takes more or less of the preceding entry. An adjshare can range from
283             # -sharelen to take nothing at all of the previous entry, up to
284             # length($entry)-sharelen to increment up to take all of the previous
285             # entry.
286             #
287             sub new {
288 76     76 1 56987 my ($class, %options) = @_;
289             ### FLI new(): %options
290              
291             # delete 'regexp' field if it's undef, as the XS code wants no 'regexp'
292             # field for no regexps, not a field set to undef
293 76         442 my @regexps;
294 76 100       281 if (defined (my $regexp = delete $options{'regexp'})) {
295 5         13 push @regexps, $regexp;
296             }
297 76 100       182 if (my $regexps = delete $options{'regexps'}) {
298 2         5 push @regexps, @$regexps;
299             }
300 76 100       178 foreach my $suffix (defined $options{'suffix'} ? $options{'suffix'} : (),
301 76         235 @{$options{'suffixes'}}) {
302 6         20 push @regexps, quotemeta($suffix) . '$';
303             }
304             ### @regexps
305              
306             # as per findutils locate.c locate() function, pattern with * ? or [ is a
307             # glob, anything else is a literal match
308             #
309             my @globs = (defined $options{'glob'} ? $options{'glob'} : (),
310 76 100       168 @{$options{'globs'} || []});
  76 100       351  
311 76         187 @globs = grep { ($_ =~ /[[*?]/
312 11 50       74 || do { push @regexps, quotemeta($_); 0 })
  0         0  
  0         0  
313             } @globs;
314             ### @globs
315              
316 76         221 my $self = bless { entry => '',
317             sharelen => 0,
318             }, $class;
319              
320 76 100       213 if (@regexps) {
321 11         33 my $regexp = join ('|', @regexps);
322 11         204 $self->{'regexp'} = qr/$regexp/s;
323             }
324 76 100       165 if (@globs) {
325 9         22 $self->{'globs'} = \@globs;
326             }
327              
328             ### regexp: $self->{'regexp'}
329             ### globs : $self->{'globs'}
330              
331 76 100       232 if (defined (my $ref = $options{'database_str_ref'})) {
    100          
332 2         4 $self->{'mref'} = $ref;
333              
334             } elsif (defined $options{'database_str'}) {
335 18         46 $self->{'mref'} = \$options{'database_str'};
336              
337             } else {
338             my $use_mmap = (defined $options{'use_mmap'}
339 56 100       165 ? $options{'use_mmap'}
340             : $class->default_use_mmap);
341             ### $use_mmap
342 56 100       118 if ($use_mmap) {
343 33 50       54 if (! eval { require File::Locate::Iterator::FileMap }) {
  33         1945  
344             ### FileMap not possible: $@
345 0         0 $use_mmap = 0;
346             }
347             }
348              
349 56         115 my $fh = $options{'database_fh'};
350 56 100       106 if (defined $fh) {
351 11 50 66     39 if ($use_mmap eq 'if_sensible'
352             && File::Locate::Iterator::FileMap::_have_mmap_layer($fh)) {
353             ### already have mmap layer, not sensible to mmap again
354 0         0 $use_mmap = 0;
355             }
356             } else {
357             my $file = (defined $options{'database_file'}
358 45 50       98 ? $options{'database_file'}
359             : $class->default_database_file);
360             ### open database_file: $file
361              
362             # Crib note: '<:raw' means without :perlio buffering, whereas
363             # binmode() preserves that buffering, assuming it's in the $ENV{'PERLIO'}
364             # defaults. Also :raw is not available in perl 5.6.
365 45 50       1658 open $fh, '<', $file
366             or croak "Cannot open $file: $!";
367 45 50       256 binmode($fh)
368             or croak "Cannot set binary mode";
369             }
370              
371 56 100       131 if ($use_mmap eq 'if_sensible') {
372 8 50       27 $use_mmap = (File::Locate::Iterator::FileMap::_mmap_size_excessive($fh)
373             ? 0
374             : 'if_possible');
375             ### if_sensible after size check becomes: $use_mmap
376             }
377              
378 56 100       116 if ($use_mmap) {
379             ### attempt mmap: $fh, (-s $fh)
380              
381             # There's many ways an mmap can fail, just chuck an eval on FileMap /
382             # File::Map it to catch them all.
383             # - An ordinary readable file of length zero may fail per POSIX, and
384             # that's how it is in linux kernel post 2.6.12. However File::Map
385             # 0.20 takes care of returning an empty string for that.
386             # - A char special usually gives 0 for its length, even for instance
387             # linux kernel special files like /proc/meminfo. Char specials can
388             # often be mapped perfectly well, but without a length don't know
389             # how much to look at. For that reason "if_possible" restricts to
390             # ordinary files, though forced "use_mmap=>1" just goes ahead anyway.
391             #
392 33 100       67 if ($use_mmap eq 'if_possible') {
393 32 100       310 if (! -f $fh) {
394             ### if_possible, not a plain file, consider not mmappable
395             } else {
396 31 100       72 if (! eval { $self->{'fm'}
  31         186  
397             = File::Locate::Iterator::FileMap->get($fh) }) {
398             ### mmap failed: $@
399             }
400             }
401             } else {
402 1         6 $self->{'fm'} = File::Locate::Iterator::FileMap->get($fh);
403             }
404             }
405 55 100       522 if ($self->{'fm'}) {
406 29         95 $self->{'mref'} = $self->{'fm'}->mmap_ref;
407             } else {
408 26         57 $self->{'fh'} = $fh;
409             }
410             }
411              
412 75 100       202 if (my $mref = $self->{'mref'}) {
413 49 100       441 unless ($$mref =~ /^\Q$header/o) { goto &_ERROR_BAD_HEADER }
  3         17  
414 46         519 $self->{'pos'} = length($header);
415             } else {
416 26         50 my $got = '';
417 26         313 read $self->{'fh'}, $got, length($header);
418 26 100       137 if ($got ne $header) { goto &_ERROR_BAD_HEADER }
  3         53  
419 23         65 $self->{'fh_start'} = tell $self->{'fh'};
420             }
421              
422 69         298 return $self;
423             }
424             sub _ERROR_BAD_HEADER {
425 6     6   818 croak 'Invalid database contents (no LOCATE02 header)';
426             }
427              
428             sub rewind {
429 12     12 1 13477 my ($self) = @_;
430              
431 12         25 $self->{'sharelen'} = 0;
432 12         22 $self->{'entry'} = '';
433 12 100       28 if ($self->{'mref'}) {
434 5         15 $self->{'pos'} = length($header);
435             } else {
436 7 50       23 $self->{'fh_start'} > 0
437             or croak "Cannot seek database";
438 7 50       93 seek ($self->{'fh'}, $self->{'fh_start'}, 0)
439             or croak "Cannot seek database: $!";
440             }
441             }
442              
443             # return true if mmap is in use
444             # (an actual mmap, not the slightly similar 'database_str' option)
445             # this is meant for internal use as a diagnostic ...
446             sub _using_mmap {
447 12     12   53 my ($self) = @_;
448 12         36 return defined $self->{'fm'};
449             }
450              
451             # Not yet documented, likely worthwhile as long as it works properly.
452             # Return empty list for nothing yet? Same as next().
453             # Return empty list at EOF? At EOF 'entry' is undefed out.
454             #
455             # =item C<< $entry = $it->current >>
456             #
457             # Return the current entry from the database, meaning the same as the last
458             # call to C returned. At the start of the database (before the first
459             # C) or at end of the database the return is an empty list.
460             #
461             # while (defined $it->next) {
462             # ...
463             # print $it->current,"\n";
464             # }
465             #
466             sub _current {
467 0     0     my ($self) = @_;
468 0 0         if (defined $self->{'entry'}) {
469 0           return $self->{'entry'};
470             } else {
471 0           return;
472             }
473             }
474              
475              
476             1;
477             __END__