File Coverage

blib/lib/Math/OEIS/Grep.pm
Criterion Covered Total %
statement 41 273 15.0
branch 7 132 5.3
condition 1 56 1.7
subroutine 11 17 64.7
pod 1 2 50.0
total 61 480 12.7


line stmt bran cond sub pod time code
1             # Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Kevin Ryde
2              
3             # This file is part of Math-OEIS.
4             #
5             # Math-OEIS is free software; you can redistribute it and/or modify it
6             # under the terms of the GNU General Public License as published by the Free
7             # Software Foundation; either version 3, or (at your option) any later
8             # version.
9             #
10             # Math-OEIS is distributed in the hope that it will be useful, but
11             # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12             # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13             # for more details.
14             #
15             # You should have received a copy of the GNU General Public License along
16             # with Math-OEIS. If not, see .
17              
18             package Math::OEIS::Grep;
19 1     1   84289 use 5.006;
  1         3  
20 1     1   5 use strict;
  1         1  
  1         17  
21 1     1   4 use warnings;
  1         1  
  1         38  
22 1     1   6 use Carp 'croak';
  1         1  
  1         47  
23 1     1   489 use Math::OEIS::Names;
  1         2  
  1         27  
24 1     1   373 use Math::OEIS::Stripped;
  1         2  
  1         1702  
25              
26             our $VERSION = 13;
27              
28             # uncomment this to run the ### lines
29             # use Smart::Comments;
30              
31              
32             my $stripped_mmap;
33              
34             sub import {
35 1     1   7 my $class = shift;
36 1         1 my $arg = shift;
37 1 50 33     1573 if ($arg && $arg eq '-search') {
38             ### Grep import() -search
39              
40             # Encode::Locale output coding if available, otherwise utf8 since that
41             # is the coding of the names file so we become a pass-through.
42 0         0 eval {
43 0         0 require Encode;
44 0         0 require PerlIO::encoding;
45 0         0 my $coding = 'utf8';
46 0         0 eval { require Encode::Locale; $coding = 'console_out'; };
  0         0  
  0         0  
47 0         0 $coding = ":encoding($coding)";
48             {
49 0         0 local $PerlIO::encoding::fallback = Encode::PERLQQ();
  0         0  
50 0         0 binmode(STDOUT, $coding);
51             # Not sure coding on STDERR is a good idea, could loop trying to print.
52             # binmode(STDERR, $coding);
53             }
54             ### $coding
55             };
56 0         0 $class->search(array=>\@_);
57 0         0 exit 0;
58             }
59             }
60              
61             sub search {
62             ### Grep search() ...
63 0     0 1 0 my $class = shift;
64 0         0 my %h = (try_abs => 1,
65             verbose => 0,
66             use_mmap => 'if_possible',
67             max_matches => 10,
68             @_);
69             ### $class
70              
71 0   0     0 my $verbose = $h{'verbose'} || 0;
72              
73 0         0 my $values_min = $h{'values_min'};
74 0         0 my $values_max = $h{'values_max'};
75             ### $values_min
76             ### $values_max
77              
78 0         0 my $name = $h{'name'};
79 0 0       0 if (defined $name) {
80 0         0 $name = "$name: ";
81             } else {
82 0         0 $name = '';
83             }
84             ### $name
85              
86 0         0 my %exclude;
87 0 0       0 if (my $aref = $h{'exclude_list'}) {
88 0         0 @exclude{@$aref} = (); # hash slice
89             }
90             ### %exclude
91              
92 0         0 my $array = $h{'array'};
93 0 0       0 if (! $array) {
94 0         0 my $string = $h{'string'};
95 0         0 $string =~ s/\s+/,/;
96 0         0 $array = [ grep {defined} split /,+/, $string ];
  0         0  
97             }
98 0 0       0 unless ($array) {
99 0         0 croak 'search() missing array=>[] parameter';
100             }
101 0 0       0 if (@$array == 0) {
102             ### empty ...
103 0         0 print "${name}no match empty list of values\n\n";
104 0         0 return;
105             }
106              
107 0         0 my $use_mmap = $h{'use_mmap'};
108              
109 0 0 0     0 if ($use_mmap && ! defined $stripped_mmap) {
110 0         0 my $stripped_obj = Math::OEIS::Stripped->instance;
111 0         0 my $stripped_filename = $stripped_obj->filename;
112 0 0       0 if (eval {
113 0         0 require File::Map;
114 0         0 File::Map::map_file ($stripped_mmap, $stripped_filename);
115 0         0 1;
116             }) {
117 0 0       0 if ($verbose) {
118 0         0 print "mmap stripped file, length ",length($stripped_mmap),"\n";
119             }
120             } else {
121 0         0 my $err = $@;
122 0 0       0 if ($use_mmap eq 'if_possible') {
123 0 0       0 if ($verbose >= 2) {
124 0         0 print "cannot mmap, fallback to open: $err\n";
125             }
126 0         0 $use_mmap = 0;
127             } else {
128 0         0 croak "Cannot mmap $stripped_filename: $err";
129             }
130             }
131             }
132              
133 0         0 my $fh;
134 0 0       0 if (! $use_mmap) {
135             # ENHANCE-ME: show the reason ENOENT etc here
136 0   0     0 $fh = Math::OEIS::Stripped->fh
137             || croak "Cannot open ~/OEIS/stripped file";
138             }
139              
140             {
141 0         0 my $str = $array->[0];
  0         0  
142 0         0 for (my $i = 1; $i <= $#$array; $i++) {
143 0         0 $str .= ','.$array->[$i];
144 0 0 0     0 if (length($str) > 50 && $i != $#$array) {
145 0         0 $str .= ',...';
146 0         0 last;
147             }
148             }
149 0         0 $name .= "match $str\n";
150             }
151              
152 0 0       0 if (defined (my $value = _constant_array(@$array))) {
153 0 0 0     0 if ($value != 0 && abs($value) <= 1000) {
154 0         0 print "${name}constant $value\n\n";
155 0         0 return;
156             }
157             }
158              
159 0 0       0 if (defined (my $diff = _constant_diff(@$array))) {
160 0 0 0     0 if (abs($diff) < 20 && abs($array->[0]) < 100) {
161 0         0 print "${name}constant difference $diff\n\n";
162 0         0 return;
163             }
164             }
165              
166 0 0       0 if ($verbose) {
167 0         0 print $name;
168 0         0 $name = '';
169             }
170             ### $use_mmap
171              
172 0         0 my $max_matches = $h{'max_matches'};
173 0         0 my $count = 0;
174              
175 0         0 my $orig_array = $array;
176 0         0 my $mung_desc = '';
177 0 0       0 MUNG: foreach my $mung ('none',
    0          
178             ($h{'_EXPERIMENTAL_exact'}
179             ? ()
180             : ('trim',
181             'negate',
182             ($h{'try_abs'} ? 'abs' : ()),
183             'half',
184             'quarter',
185             'double')),
186             ){
187             ### $mung
188 0 0       0 last if $count; # no more munging when found a match
189              
190 0 0       0 if ($mung eq 'none') {
    0          
    0          
    0          
    0          
    0          
    0          
191              
192             } elsif ($mung eq 'trim') {
193 0         0 my $count = 0;
194 0         0 $array = [ @$orig_array ]; # copy
195 0   0     0 while (@$array && $array->[0] == 0) { # leading zeros
196 0         0 shift @$array;
197 0         0 $count++;
198             }
199 0 0       0 if (@$array) { # plus one more
200 0         0 shift @$array;
201 0         0 $count++;
202             }
203             ### trimmed to: join(',',@$array)
204 0         0 $mung_desc = "[TRIMMED START $count VALUES]\n";
205 0 0 0     0 if (_aref_any_nonzero($array) &&
      0        
206             (@$array >= 3 || length(join(',',@$array)) >= 5)) {
207 0         0 $orig_array = $array;
208             } else {
209             ### too few values to trim ...
210 0         0 next MUNG;
211             }
212              
213             } elsif ($mung eq 'negate') {
214 0         0 $mung_desc = "[NEGATED]\n";
215 0         0 $array = [ map { my $value = $_;
  0         0  
216 0 0 0     0 unless ($value eq '0' || $value =~ s/^-//) {
217 0         0 $value = "-$value";
218             }
219             $value
220 0         0 } @$orig_array ];
221              
222             } elsif ($mung eq 'half') {
223 0         0 $mung_desc = "[HALF]\n";
224             $array = [ map {
225 0         0 my $value = _to_bigint($_);
  0         0  
226 0 0       0 if ($value % 2) {
227 0 0       0 if ($verbose) {
228 0         0 print "not all even, skip halving\n";
229             }
230 0         0 next MUNG;
231             }
232 0         0 $value/2
233             } @$orig_array ];
234              
235             } elsif ($mung eq 'quarter') {
236 0         0 $mung_desc = "[QUARTER]\n";
237             $array = [ map {
238 0         0 my $value = _to_bigint($_);
  0         0  
239 0 0       0 if ($value % 4) {
240 0 0       0 if ($verbose) {
241 0         0 print "not all multiple of 4, skip quartering\n";
242             }
243 0         0 next MUNG;
244             }
245 0         0 $value/4
246             } @$orig_array ];
247              
248             } elsif ($mung eq 'double') {
249 0         0 $mung_desc = "[DOUBLE]\n";
250 0         0 $array = [ map {2*_to_bigint($_)} @$orig_array ];
  0         0  
251              
252             } elsif ($mung eq 'abs') {
253 0         0 $mung_desc = "[ABSOLUTE VALUES]\n";
254 0         0 my $any_negative = 0;
255 0         0 $array = [ map { my $abs = $_;
  0         0  
256 0         0 $any_negative |= ($abs =~ s/^-//);
257 0         0 $abs
258             } @$orig_array ];
259 0 0       0 if (! $any_negative) {
260 0 0       0 if ($verbose) {
261 0         0 print "no negatives, skip absolutize\n";
262             }
263 0         0 next;
264             }
265 0 0       0 if (_constant_array(@$array)) {
266 0 0       0 if ($verbose) {
267 0         0 print "abs values all equal, skip absolutize\n";
268             }
269 0         0 next;
270             }
271             }
272              
273 0         0 my $re = $class->array_to_regexp($array);
274 0 0       0 if ($h{'_EXPERIMENTAL_exact'}) {
275 0         0 $re = ' '.$re;
276             }
277              
278 0 0       0 if ($use_mmap) {
279 0         0 pos($stripped_mmap) = 0;
280             ### mmap total length: length($stripped_mmap)
281             } else {
282 0 0       0 seek $fh, 0, 0
283             or croak "Error seeking stripped file: ",$!;
284             }
285 0         0 my $block = '';
286 0         0 my $extra = '';
287 0         0 SEARCH: for (;;) {
288 0         0 my $line;
289 0 0       0 if ($use_mmap) {
290              
291             # using regexp only
292 0 0       0 $stripped_mmap =~ /$re/g or last SEARCH;
293 0         0 my $found_pos = pos($stripped_mmap);
294              
295             # my $found_whole = $&;
296             # ### $found_whole
297              
298             # $re matches , or \n at end
299             # $found_pos may be after \n of matched line
300             # So for $end look from $found_pos-1 onwards.
301             # For $start look from $found_pos-2 since don't want rindex() to
302             # give the \n which is at $found_pos-1.
303 0         0 my $start = rindex($stripped_mmap,"\n",$found_pos-2) + 1;
304 0         0 my $end = index($stripped_mmap,"\n",$found_pos-1);
305 0         0 pos($stripped_mmap) = $end;
306 0         0 $line = substr($stripped_mmap, $start, $end-$start);
307              
308             ### $found_pos
309             ### char at found_pos: substr($stripped_mmap,$found_pos,1)
310             ### $start
311             ### $end
312             ### found_pos from line end: $end-$found_pos
313             ### $line
314             ### assert: $end >= $start
315              
316             # my $pos = 0;
317             # using combination index() and regexp
318             # for (;;) {
319             # $stripped_mmap =~ /$re/g or last SEARCH;
320             # my $found_pos = pos($stripped_mmap)-1;
321             # # my $found_pos = index($stripped_mmap,$fixed,$pos);
322             # # if ($found_pos < 0) { last SEARCH; }
323             #
324             # my $start = rindex($stripped_mmap,"\n",$found_pos) + 1;
325             # my $end = index($stripped_mmap,"\n",$found_pos);
326             # $pos = $end;
327             # $line = substr($stripped_mmap, $start, $end-$start);
328             # last if $line =~ $re;
329             # }
330              
331             } else {
332             ### block reads ...
333              
334 0         0 for (;;) {
335 0 0       0 if ($block =~ /$re/g) {
336             # same $found_pos logic as the mmap case above
337 0         0 my $found_pos = pos($block);
338 0         0 my $start = rindex($block,"\n",$found_pos-2) + 1;
339 0         0 my $end = index($block,"\n",$found_pos-1);
340 0         0 pos($block) = $end;
341 0         0 $line = substr($block, $start, $end-$start);
342 0         0 last;
343             }
344 0         0 $block = _read_block_lines($fh, $extra);
345 0 0       0 defined $block or last SEARCH;
346              
347             # or line by line
348             # $line = readline $fh;
349             # defined $line or last SEARCH;
350             }
351             }
352              
353 0 0       0 my ($anum,$found_values_str)
354             = Math::OEIS::Stripped->line_split_anum($line)
355             or die "oops, A-number not matched in line: ",$line;
356             ### $anum
357              
358 0 0       0 if (exists $exclude{$anum}) {
359             ### exclude ...
360 0         0 next;
361             }
362              
363             # enforce values_min, values_max on the found sequence
364 0 0 0     0 if (defined $values_min || defined $values_max) {
365 0         0 my @found_values =Math::OEIS::Stripped->values_split($found_values_str);
366 0 0 0     0 if ((defined $values_min && grep {$_ < $values_min} @found_values)
  0   0     0  
      0        
367 0         0 || (defined $values_max && grep {$_ > $values_max} @found_values)) {
368             ### skip due to found values out of range ...
369 0         0 next;
370             }
371             }
372              
373 0 0 0     0 if (defined $max_matches && $count >= $max_matches) {
374 0         0 print "... and more matches\n";
375 0         0 last SEARCH;
376             }
377              
378 0         0 print $name;
379 0         0 $name = '';
380              
381 0         0 print $mung_desc;
382 0         0 $mung_desc = '';
383              
384 0         0 my $anum_name = Math::OEIS::Names->anum_to_name($anum);
385 0 0       0 if (! defined $anum_name) { $anum_name = '[unknown name]'; }
  0         0  
386 0         0 print "$anum $anum_name\n";
387              
388 0         0 print "$line\n";
389 0         0 $count++;
390             }
391             }
392 0 0       0 if ($count == 0) {
393 0 0       0 if ($verbose) {
394 0         0 print "no matches\n";
395             }
396             }
397 0 0 0     0 if ($count || $verbose) {
398 0         0 print "\n";
399             }
400             }
401              
402             # Read a block of multiple lines from $fh.
403             # The return is a string $block, or undef at EOF.
404             # $extra in $_[1] is used to hold a partial line.
405             sub _read_block_lines {
406 4     4   3414 my ($fh, $extra) = @_;
407 4         7 my $block = $extra;
408 4         5 for (;;) {
409 7         162 my $len = read($fh, $block, 65536,
410             length($block)); # append to $block
411 7 50       18 if (! defined $len) {
412 0         0 croak "Error seeking stripped file: ",$!;
413             }
414 7 100       16 if (! $len) {
415             # EOF
416 4         6 $_[1] = '';
417 4 100       9 if (length ($block)) {
418 2         130 return $block;
419             } else {
420 2         5 return undef;
421             }
422             }
423 3         224 my $end = rindex $block, "\n";
424 3 50       8 if ($end >= 0) {
425             # keep partial line in $extra
426 0           $_[1] = substr ($block, $end); # partial line
427 0           substr($block, $end, length($block)-$end, ''); # truncate block
428 0           return $block;
429             }
430             # no end of line in $block, keep reading to find one
431             }
432             }
433              
434 1     1   8 use constant _MIN_MATCH_COUNT => 15;
  1         2  
  1         46  
435 1     1   5 use constant _MIN_MATCH_CHARS => 40;
  1         2  
  1         46  
436 1     1   5 use constant _MAX_REGEXP_LENGTH => 400;
  1         2  
  1         371  
437              
438             # Return a regexp (a string) which matches the numbers in $array.
439             # $str =~ s/^\s+//;
440             # $str =~ s/\s+$//;
441             # split /\s*,\s*/, $str
442             sub array_to_regexp {
443 0     0 0   my ($self, $array) = @_;
444             ### array_to_regexp(): join(',',@$array)
445 0           my $re = ',';
446 0           my $close = 0;
447 0           foreach my $i (0 .. $#$array) {
448 0           my $value = $array->[$i];
449 0 0         if (length($re) > _MAX_REGEXP_LENGTH) { # don't make a huge regexp
450 0           last;
451             }
452              
453             # Mandatory match of numbers or chars, whichever comes first,
454             # after that OEIS can end.
455             # Most OEIS samples are nice and long, with even hard ones going to
456             # limits of reasonable computing, but some are shorter. For example
457             # A109680 circa July 2016 had only 19 values 46 chars.
458             # ENHANCE-ME: take a parameter for these minimums.
459             #
460 0 0 0       if ($i >= _MIN_MATCH_COUNT || length($re) > _MIN_MATCH_CHARS) {
461 0           $re .= '(?:[\r\n]|';
462 0           $close++;
463             }
464 0           $re .= $value . ',';
465             }
466 0           $re .= ')' x $close;
467             # $re .= "[,\r\n]";
468             ### $re
469 0           return $re;
470             }
471              
472             # constant_diff($a,$b,$c,...)
473             # If all the given values have a constant difference then return that amount.
474             # Otherwise return undef.
475             #
476             sub _constant_diff {
477 0     0     my $diff = shift;
478 0 0         unless (@_) {
479 0           return undef;
480             }
481 0           my $value = shift;
482 0           $diff = $value - $diff;
483 0           while (@_) {
484 0           my $next_value = shift;
485 0 0         if ($next_value - $value != $diff) {
486 0           return undef;
487             }
488 0           $value = $next_value;
489             }
490 0           return $diff;
491             }
492              
493             # _constant_array($a,$b,$c,...)
494             # If all the given values are all equal then return that value.
495             # Otherwise return undef.
496             #
497             sub _constant_array {
498 0     0     my $value = shift;
499 0           while (@_) {
500 0           my $next_value = shift;
501 0 0         if ($next_value != $value) {
502 0           return undef;
503             }
504             }
505 0           return $value;
506             }
507              
508             # return true if the array in $aref has any non-zero entries
509             sub _aref_any_nonzero {
510 0     0     my ($aref) = @_;
511 0           foreach my $value (@$aref) {
512 0 0         if ($value) { return 1; }
  0            
513             }
514 0           return 0;
515             }
516              
517             {
518             my $bigint_class;
519             my $length_limit = length(~0) - 2;
520             sub _to_bigint {
521 0     0     my ($n) = @_;
522 0 0         if (length($n) < $length_limit) {
523 0           return $n;
524             }
525 0   0       $bigint_class ||= do {
526             # Crib note: don't change the back-end if already loaded
527 0           require Math::BigInt;
528 0           'Math::BigInt'
529             };
530             # stringize as a workaround for a bug where Math::BigInt::GMP
531             # incorrectly converts UV numbers bigger than IV
532 0           return $bigint_class->new("$n");
533             }
534             }
535              
536             1;
537             __END__