File Coverage

blib/lib/Math/NumSeq/OEIS/File.pm
Criterion Covered Total %
statement 93 389 23.9
branch 21 184 11.4
condition 9 85 10.5
subroutine 21 40 52.5
pod 8 10 80.0
total 152 708 21.4


)? # empty or
line stmt bran cond sub pod time code
1             # Copyright 2011, 2012, 2013, 2014 Kevin Ryde
2              
3             # This file is part of Math-NumSeq.
4             #
5             # Math-NumSeq is free software; you can redistribute it and/or modify
6             # it under the terms of the GNU General Public License as published by the
7             # Free Software Foundation; either version 3, or (at your option) any later
8             # version.
9             #
10             # Math-NumSeq is distributed in the hope that it will be useful, but
11             # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12             # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13             # for more details.
14             #
15             # You should have received a copy of the GNU General Public License along
16             # with Math-NumSeq. If not, see .
17              
18              
19             # http://oeis.org/wiki/Clear-cut_examples_of_keywords
20             #
21             # ENHANCE-ME: share most of the a-file/b-file reading with Math::NumSeq::File
22              
23             package Math::NumSeq::OEIS::File;
24 2     2   13905 use 5.004;
  2         8  
  2         109  
25 2     2   9 use strict;
  2         5  
  2         57  
26 2     2   9 use Carp;
  2         4  
  2         211  
27 2     2   1999 use POSIX ();
  2         15846  
  2         56  
28 2     2   18 use File::Spec;
  2         4  
  2         50  
29 2     2   4601 use Symbol 'gensym';
  2         5159  
  2         402  
30              
31 2     2   16 use vars '$VERSION','@ISA';
  2         5  
  2         149  
32             $VERSION = 71;
33              
34 2     2   3683 use Math::NumSeq;
  2         5  
  2         98  
35             @ISA = ('Math::NumSeq');
36             *_to_bigint = \&Math::NumSeq::_to_bigint;
37              
38 2     2   12 use vars '$VERSION';
  2         4  
  2         135  
39             $VERSION = 71;
40              
41 2     2   17 eval q{use Scalar::Util 'weaken'; 1}
  2         3  
  2         224  
42             || eval q{sub weaken { $_[0] = undef }; 1 }
43             || die "Oops, error making a weaken() fallback: $@";
44              
45             # uncomment this to run the ### lines
46             # use Smart::Comments;
47              
48              
49             # use constant name => Math::NumSeq::__('OEIS File');
50 2     2   9269 use Math::NumSeq::OEIS;
  2         8  
  2         139  
51             *parameter_info_array = \&Math::NumSeq::OEIS::parameter_info_array;
52              
53             use constant::defer _HAVE_ENCODE => sub {
54 0 0       0 eval { require Encode; 1 } || 0;
  0         0  
  0         0  
55 2     2   11 };
  2         3  
  2         25  
56              
57             sub description {
58 0     0 1 0 my ($class_or_self) = @_;
59 0 0 0     0 if (ref $class_or_self && defined $class_or_self->{'description'}) {
60             # instance
61 0         0 return $class_or_self->{'description'};
62             } else {
63             # class
64 0         0 return Math::NumSeq::__('OEIS sequence from file.');
65             }
66             }
67              
68             sub values_min {
69 0     0 1 0 my ($self) = @_;
70             ### OEIS-File values_min() ...
71 0         0 return _analyze($self)->{'values_min'};
72             }
73             sub values_max {
74 0     0 1 0 my ($self) = @_;
75             ### OEIS-File values_max() ...
76 0         0 return _analyze($self)->{'values_max'};
77             }
78              
79             my %analyze_characteristics = (increasing => 1,
80             increasing_from_i => 1,
81             non_decreasing => 1,
82             non_decreasing_from_i => 1,
83             smaller => 1,
84             );
85             sub characteristic {
86 0     0 1 0 my ($self, $key) = @_;
87 0 0       0 if ($analyze_characteristics{$key}) {
88 0         0 _analyze($self);
89             }
90 0         0 return shift->SUPER::characteristic(@_);
91             }
92              
93             sub oeis_dir {
94 247     247 0 21799 require File::HomeDir;
95 247         14928 return File::Spec->catfile (File::HomeDir->my_home, 'OEIS');
96             }
97             sub anum_to_bfile {
98 0     0 0 0 my ($anum, $prefix) = @_;
99 0   0     0 $prefix ||= 'b';
100 0         0 $anum =~ s/^A/$prefix/;
101 0         0 return "$anum.txt";
102             }
103              
104             #------------------------------------------------------------------------------
105             # Keep track of all instances which exist and on an ithread CLONE re-open
106             # any filehandles in the instances, so they have their own independent file
107             # positions in the new thread.
108              
109             my %instances;
110             sub DESTROY {
111 65     65   169 my ($self) = @_;
112 65         592 delete $instances{$self+0};
113             }
114             sub CLONE {
115 0     0   0 my ($class) = @_;
116 0         0 foreach my $self (values %instances) {
117 0 0       0 next unless $self;
118 0 0       0 next unless $self->{'fh'};
119 0         0 my $pos = _tell($self);
120 0         0 my $fh = gensym;
121 0 0       0 if (open $fh, "< $self->{'filename'}") {
122 0         0 $self->{'fh'} = $fh;
123 0         0 _seek ($self, $pos);
124             } else {
125 0         0 delete $self->{'fh'};
126 0         0 delete $self->{'filename'};
127             }
128             }
129             }
130              
131             #------------------------------------------------------------------------------
132              
133             # The length in decimal digits of the biggest value which fits in a plain
134             # Perl integer. For example on a 32-bit system this is 9 since 9 digit
135             # numbers such as "999_999_999" are the biggest which fit a signed IV
136             # (+2^31).
137             #
138             # The IV size is probed rather than using ~0 since under "perl -Minteger"
139             # have ~0 as -1 rather than the biggest UV ... except "use integer" is not
140             # normally global.
141             #
142             # The NV size is applied to the limit too since not sure should trust values
143             # to stay in IV or UV. This means on a 64-bit integer with 53-bit NV
144             # "double" the limit is 53-bits.
145             #
146 2         9 use constant 1.02 _MAX_DIGIT_LENGTH => do {
147             ### ~0 is: ~0
148              
149 2         29 my $iv = 0;
150 2         9 for (1 .. 256) {
151 130         137 my $new = ($iv << 1) | 1;
152 130 100 66     424 unless ($new > $iv && ($new & 1) == 1) {
153 2         7 last;
154             }
155 128         152 $iv = $new;
156             }
157             ### $iv
158              
159 2         10 require POSIX;
160 2         47 my $nv = POSIX::FLT_RADIX() ** (POSIX::DBL_MANT_DIG()-5);
161             ### $nv
162              
163 2         7 my $iv_len = length($iv) - 1;
164 2         16 my $nv_len = length($nv) - 1;
165 2 50       10529 ($iv_len < $nv_len ? $iv_len : $nv_len) # smaller of the two lengths;
166 2     2   1676 };
  2         44  
167             ### _MAX_DIGIT_LENGTH: _MAX_DIGIT_LENGTH()
168              
169              
170             #------------------------------------------------------------------------------
171              
172             # special case a000000.txt files to exclude
173             #
174             my %afile_exclude
175             = (
176             # a003849.txt has replication level words rather than the individual
177             # sequence values.
178             'a003849.txt' => 1,
179              
180             # a027750.txt is unflattened divisors as lists.
181             # Its first line is a correct looking "1 1" so _afile_is_good() doesn't
182             # notice.
183             'a027750.txt' => 1,
184             );
185              
186              
187             # Fields:
188             # fh File handle ref, if reading B-file or A-file
189             #
190             # next_seek File pos to seek $fh for next() to read from.
191             # ith() sets this when it moves the file position.
192             #
193             # array Arrayref of values if using .internal or .html.
194             # array_pos Index 0,1,2,... of next value of $array to return by next().
195             #
196             # i Next $i for next() to return.
197             # When reading a file this is ignored, use the file i instead.
198              
199             sub new {
200             ### OEIS-File new() ...
201 65     65 1 33396 my $self = shift->SUPER::new(@_);
202              
203 65         251 delete $self->{'next_seek'}; # no initial seek
204 65         266 $self->{'characteristic'}->{'integer'} = 1;
205              
206 65         138 my $anum = $self->{'anum'};
207 65         516 (my $num = $anum) =~ s/^A//;
208 65         331 foreach my $basefile ("a$num.txt",
209             "b$num.txt") {
210 130 100       405 next if $afile_exclude{$basefile};
211              
212 120 100 100     801 next if $self->{'_dont_use_afile'} && $basefile =~ /^a/;
213 76 100 66     462 next if $self->{'_dont_use_bfile'} && $basefile =~ /^b/;
214              
215 37         123 my $filename = File::Spec->catfile (oeis_dir(), $basefile);
216             ### $filename
217 37         3793 my $fh = gensym();
218 37 50       1815 if (! open $fh, "< $filename") {
219             ### cannot open: $!
220 37         213 next;
221             }
222              
223 0         0 $self->{'filename'} = $filename; # the B-file or A-file name
224 0         0 $self->{'fh'} = $fh;
225 0 0       0 if (! _afile_is_good($self)) {
226             ### this afile not good ...
227 0         0 close delete $self->{'fh'};
228 0         0 delete $self->{'filename'};
229 0         0 next;
230             }
231 0         0 $self->{'fh_i'} = $self->i_start; # at first entry
232              
233             ### opened: $fh
234 0         0 last;
235             }
236              
237 65   33     331 my $have_info = (_read_internal_txt($self, $anum)
238             || _read_internal_html($self, $anum)
239             || _read_html($self, $anum));
240              
241 65 50 33     481 if (! $have_info && ! $self->{'fh'}) {
242 65         19668 croak 'OEIS file(s) not found for A-number "',$anum,'"';
243             }
244              
245 0         0 weaken($instances{$self+0} = $self);
246 0         0 return $self;
247             }
248              
249             sub _analyze {
250 0     0   0 my ($self) = @_;
251              
252 0 0       0 if ($self->{'analyze_done'}) {
253 0         0 return $self;
254             }
255 0         0 $self->{'analyze_done'} = 1;
256              
257             ### _analyze() ...
258              
259 0         0 my $i_start = $self->i_start;
260 0         0 my ($i, $value);
261 0         0 my ($prev_i, $prev_value);
262              
263 0         0 my $values_min;
264 0         0 my $values_max;
265 0         0 my $increasing_from_i = $i_start;
266 0         0 my $non_decreasing_from_i = $i_start;
267 0         0 my $strictly_smaller_count = 0;
268 0         0 my $smaller_count = 0;
269 0         0 my $total_count = 0;
270              
271             my $analyze = sub {
272             ### $prev_value
273             ### $value
274 0 0 0 0   0 if (! defined $values_min || $value < $values_min) {
275 0         0 $values_min = $value;
276             }
277 0 0 0     0 if (! defined $values_max || $value > $values_max) {
278 0         0 $values_max = $value;
279             }
280              
281 0 0       0 if (defined $prev_value) {
282 0         0 my $cmp = ($value <=> $prev_value);
283 0 0       0 if ($cmp < 0) {
284             # value < $prev_value
285 0         0 $increasing_from_i = $i;
286 0         0 $non_decreasing_from_i = $i;
287             }
288 0 0       0 if ($cmp <= 0) {
289             # value <= $prev_value
290 0         0 $increasing_from_i = $i;
291             }
292             }
293              
294 0         0 $total_count++;
295 0         0 $smaller_count += (abs($value) <= $i);
296 0         0 $strictly_smaller_count += ($value < $i);
297              
298 0         0 $prev_i = $value;
299 0         0 $prev_value = $value;
300 0         0 };
301              
302 0 0       0 if (my $fh = $self->{'fh'}) {
303 0         0 my $oldpos = _tell($self);
304 0         0 while (($i, $value) = _readline($self)) {
305 0         0 $analyze->($value);
306 0 0       0 last if $total_count > 200;
307             }
308 0         0 _seek ($self, $oldpos);
309             } else {
310 0         0 $i = $i_start;
311 0         0 foreach (@{$self->{'array'}}) {
  0         0  
312 0         0 $i++;
313 0         0 $value = $_;
314 0         0 $analyze->();
315             }
316             }
317              
318 0   0     0 my $range_is_small = (defined $values_max
319             && $values_max - $values_min <= 16);
320             ### $range_is_small
321              
322             # "full" means whole sequence in sample values
323             # "sign" means negatives in sequence
324 0 0 0     0 if (! defined $self->{'values_min'}
      0        
325             && ($range_is_small
326             || $self->{'characteristic'}->{'OEIS_full'}
327             || ! $self->{'characteristic'}->{'OEIS_sign'})) {
328             ### set values_min: $values_min
329 0         0 $self->{'values_min'} = $values_min;
330             }
331 0 0 0     0 if (! defined $self->{'values_max'}
      0        
332             && ($range_is_small
333             || $self->{'characteristic'}->{'OEIS_full'})) {
334             ### set values_max: $values_max
335 0         0 $self->{'values_max'} = $values_max;
336             }
337              
338 0   0     0 $self->{'characteristic'}->{'smaller'}
339             = ($total_count == 0
340             || ($smaller_count / $total_count >= .9
341             && $strictly_smaller_count > 0));
342             ### decide smaller: $self->{'characteristic'}->{'smaller'}
343              
344             ### $increasing_from_i
345 0 0 0     0 if (defined $prev_i && $increasing_from_i < $prev_i) {
346 0 0       0 if ($increasing_from_i - $i_start < 20) {
347 0         0 $self->{'characteristic'}->{'increasing_from_i'} = $increasing_from_i;
348             }
349 0 0       0 if ($increasing_from_i == $i_start) {
350 0         0 $self->{'characteristic'}->{'increasing'} = 1;
351             }
352             }
353              
354             ### $non_decreasing_from_i
355 0 0 0     0 if (defined $prev_i && $non_decreasing_from_i < $prev_i) {
356 0 0       0 if ($non_decreasing_from_i - $i_start < 20) {
357 0         0 $self->{'characteristic'}->{'non_decreasing_from_i'} = $non_decreasing_from_i;
358             }
359 0 0       0 if ($non_decreasing_from_i == $i_start) {
360 0         0 $self->{'characteristic'}->{'non_decreasing'} = 1;
361             }
362             }
363              
364 0         0 return $self;
365             }
366              
367             # # compare $x <=> $y but in strings in case they're bigger than IV or NV
368             # # my $cmp = _value_cmp ($value, $prev_value);
369             # sub _value_cmp {
370             # my ($x, $y) = @_;
371             # ### _value_cmp(): "$x $y"
372             # ### cmp: $x cmp $y
373             #
374             # my $x_neg = substr($x,0,1) eq '-';
375             # my $y_neg = substr($y,0,1) eq '-';
376             # ### $x_neg
377             # ### $y_neg
378             #
379             # return ($y_neg <=> $x_neg
380             # || ($x_neg ? -1 : 1) * (length($x) <=> length($y)
381             # || $x cmp $y));
382             # }
383              
384             sub _seek {
385 0     0   0 my ($self, $pos) = @_;
386 0 0       0 seek ($self->{'fh'}, $pos, 0)
387             or croak "Cannot seek $self->{'filename'}: $!";
388             }
389             sub _tell {
390 0     0   0 my ($self) = @_;
391 0         0 my $pos = tell $self->{'fh'};
392 0 0       0 if ($pos < 0) {
393 0         0 croak "Cannot tell file position $self->{'filename'}: $!";
394             }
395 0         0 return $pos;
396             }
397              
398             sub rewind {
399 65     65 1 178 my ($self) = @_;
400             ### OEIS-File rewind() ...
401              
402 65         381 $self->{'i'} = $self->i_start;
403 65         188 $self->{'array_pos'} = 0;
404 65         193 $self->{'next_seek'} = 0;
405             }
406              
407             sub next {
408 0     0 1 0 my ($self) = @_;
409             ### OEIS-File next(): "i=$self->{'i'}"
410              
411 0         0 my $value;
412 0 0       0 if (my $fh = $self->{'fh'}) {
413             ### from readline ...
414 0 0       0 if (defined (my $pos = delete $self->{'next_seek'})) {
415             ### seek to: $pos
416 0         0 _seek($self, $pos);
417             }
418 0         0 return _readline($self);
419              
420             } else {
421             ### from array ...
422 0 0       0 my ($value) = _array_value($self, $self->{'array_pos'}++)
423             or return;
424 0         0 return ($self->{'i'}++, $value);
425             }
426             }
427              
428             # Return $self->{'array'}->[$pos], or no values if $pos past end of array.
429             # Array values are promoted to BigInt if necessary.
430             sub _array_value {
431 0     0   0 my ($self, $pos) = @_;
432             ### _array_value(): $pos
433              
434 0         0 my $array = $self->{'array'};
435 0 0       0 if ($pos > $#$array) {
436             ### past end of array ...
437 0         0 return;
438             }
439 0         0 my $value = $array->[$pos];
440              
441             # large values as Math::BigInt
442             # initially $array has strings, make bigint objects when required
443 0 0 0     0 if (! ref $value && length($value) > _MAX_DIGIT_LENGTH) {
444 0         0 $value = $array->[$pos] = _to_bigint($value);
445             }
446             ### $value
447 0         0 return $value;
448             }
449              
450             # Read a line from an open B-file or A-file, return ($i,$value).
451             # At EOF return empty ().
452             #
453             sub _readline {
454 0     0   0 my ($self) = @_;
455 0         0 my $fh = $self->{'fh'};
456 0         0 while (defined (my $line = <$fh>)) {
457 0         0 chomp $line;
458 0         0 $line =~ tr/\r//d; # delete CR if CRLF line endings, eg. b009000.txt
459             ### $line
460              
461 0 0       0 if ($line =~ /^\s*(#|$)/) {
462             ### ignore blank or comment ...
463             # comment lines with "#" eg. b002182.txt
464 0         0 next;
465             }
466              
467             # leading whitespace allowed as per b195467.txt
468 0 0       0 if (my ($i, $value) = ($line =~ /^\s*
469             ([0-9]+) # i
470             [ \t]+
471             (-?[0-9]+) # value
472             [ \t]*
473             $/x)) {
474             ### _readline: "$i $value"
475 0 0       0 if (length($value) > _MAX_DIGIT_LENGTH) {
476 0         0 $value = _to_bigint($value);
477             }
478 0         0 $self->{'fh_i'} = $i+1;
479 0         0 return ($i, $value);
480             }
481             }
482 0         0 undef $self->{'fh_i'};
483 0         0 return;
484             }
485              
486             # Return true if the a000000.txt file in $self->{'fh'} looks good.
487             # Various a-files are source code or tables rather than sequence values.
488             #
489             sub _afile_is_good {
490 0     0   0 my ($self) = @_;
491 0         0 my $fh = $self->{'fh'};
492 0         0 my $good = 0;
493 0         0 my $prev_i;
494 0         0 while (defined (my $line = <$fh>)) {
495 0         0 chomp $line;
496 0         0 $line =~ tr/\r//d; # delete CR if CRLF line endings, eg. b009000.txt
497             ### $line
498              
499 0 0       0 if ($line =~ /^\s*(#|$)/) {
500             ### ignore blank or comment ...
501 0         0 next;
502             }
503              
504             # Must have line like "0 123". Can have negative OFFSET and so index i,
505             # eg. A166242 (though that one doesn't have an A-file).
506 0 0       0 my ($i,$value) = ($line =~ /^(-?[0-9]+) # i
507             [ \t]+
508             (-?[0-9]+) # value
509             [ \t]*
510             $/x)
511             or last;
512              
513 0 0 0     0 if (defined $prev_i && $i != $prev_i+1) {
514             ### bad A-file, initial "i" values not consecutive ...
515 0         0 last;
516             }
517 0         0 $prev_i = $i;
518              
519 0         0 $good++;
520 0 0       0 if ($good >= 3) {
521             ### three good lines, A-file is good ...
522 0         0 _seek ($self, 0);
523 0         0 return 1;
524             }
525             }
526 0         0 return 0;
527             }
528              
529             sub _read_internal_txt {
530 65     65   145 my ($self, $anum) = @_;
531             ### _read_internal_txt(): $anum
532              
533 65 100       280 return 0 if $self->{'_dont_use_internal'};
534              
535 52         229 foreach my $basefile ("$anum.internal.txt") {
536 52 50       223 my ($fullname, $contents) = _slurp_oeis_file($self,$basefile)
537             or next;
538 0 0       0 if (_HAVE_ENCODE) {
539             # "Internal" text format is utf-8.
540 0         0 $contents = Encode::decode('utf-8', $contents, Encode::FB_PERLQQ());
541             }
542              
543             ### $contents
544              
545             # eg. "%O A007318 0,5"
546 0         0 my $offset;
547 0 0       0 if ($contents =~ /^%O\s+\Q$anum\E\s+(\d+)/im) {
548 0         0 $offset = $1;
549             ### %O line: $offset
550             } else {
551 0         0 $offset = 0;
552             }
553              
554             # eg. "%N A007318 Pascal's triangle ..."
555 0 0       0 if ($contents =~ m{^%N\s+\Q$anum\E\s+(.*)}im) {
556 0         0 _set_description ($self, $1);
557             } else {
558             ### description not matched ...
559             }
560              
561             # eg. "%K A007318 nonn,tabl,nice,easy,core,look,hear,changed"
562 0   0     0 _set_characteristics ($self,
563             $contents =~ /^%K\s+\Q$anum\E\s+(.*)/im && $1);
564              
565             # the eishelp1.html says
566             # %V,%W,%X lines for signed sequences
567             # %S,%T,%U lines for non-negative sequences
568             # though now %S is signed and unsigned both is it?
569             #
570 0 0       0 if (! $self->{'fh'}) {
571 0         0 my @samples;
572             # capital %STU etc, but any case
573 0         0 while ($contents =~ m{^%[VWX]\s+\Q$anum\E\s+(.*)}mg) {
574 0         0 push @samples, $1;
575             }
576 0 0       0 unless (@samples) {
577 0         0 while ($contents =~ m{^%[STU]\s+\Q$anum\E\s+(.*)}mg) {
578 0         0 push @samples, $1;
579             }
580 0 0       0 unless (@samples) {
581 0         0 croak "Oops list of values not found in ",$self->{'filename'};
582             }
583             }
584             # join multiple lines of samples
585 0         0 _split_sample_values ($self, join(', ',@samples));
586             }
587              
588             # %O "OFFSET" is subscript of first number.
589             # Or for digit expansions it's the number of terms before the decimal
590             # point, per http://oeis.org/eishelp2.html#RO
591             #
592 0 0       0 unless ($self->{'characteristic'}->{'digits'}) {
593 0         0 $self->{'i'} = $self->{'i_start'} = $offset;
594             }
595             ### i: $self->{'i'}
596             ### i_start: $self->{'i_start'}
597              
598 0         0 return 1; # success
599             }
600              
601 52         611 return 0; # file not found
602             }
603              
604             sub _read_internal_html {
605 65     65   137 my ($self, $anum) = @_;
606             ### _read_internal_html(): $anum
607              
608 65 100       545 return 0 if $self->{'_dont_use_internal'};
609              
610 52         188 foreach my $basefile ("$anum.internal.html") {
611 52 50       114 my ($fullname, $contents) = _slurp_oeis_file($self,$basefile)
612             or next;
613             # "Internal" files are served as html with a charset indicator
614 0         0 $contents = _decode_html_charset($contents);
615             ### $contents
616              
617 0         0 my $offset;
618 0 0       0 if ($contents =~ /(^|)%O\s+(\d+)/im) {
619 0         0 $offset = $2;
620             ### %O line: $offset
621             } else {
622 0         0 $offset = 0;
623             }
624              
625 0 0       0 if ($contents =~ m{(^|)%N (.*?)(|$)}im) {
626 0         0 _set_description ($self, $2);
627             } else {
628             ### description not matched ...
629             }
630              
631 0   0     0 _set_characteristics ($self,
632             $contents =~ /(^|)%K (.*?)(|$)/im
633             && $2);
634              
635             # the eishelp1.html says
636             # %V,%W,%X lines for signed sequences
637             # %S,%T,%U lines for non-negative sequences
638             # though now %S is signed and unsigned both is it?
639             #
640 0 0       0 if (! $self->{'fh'}) {
641 0         0 my @samples;
642             # capital %STU etc, but any case
643 0         0 while ($contents =~ m{(^|<[tT][tT]>)%[VWX] (.*?)(|$)}mg) {
644 0         0 push @samples, $2;
645             }
646 0 0       0 unless (@samples) {
647 0         0 while ($contents =~ m{(^|<[tT][tT]>)%[STU] (.*?)(|$)}mg) {
648 0         0 push @samples, $2;
649             }
650 0 0       0 unless (@samples) {
651 0         0 croak "Oops list of values not found in ",$self->{'filename'};
652             }
653             }
654             # join multiple lines of samples
655 0         0 _split_sample_values ($self, join(', ',@samples));
656             }
657              
658             # %O "OFFSET" is subscript of first number.
659             # Or for digit expansions it's the number of terms before the decimal
660             # point, per http://oeis.org/eishelp2.html#RO
661             #
662 0 0       0 unless ($self->{'characteristic'}->{'digits'}) {
663 0         0 $self->{'i'} = $self->{'i_start'} = $offset;
664             }
665             ### i: $self->{'i'}
666             ### i_start: $self->{'i_start'}
667              
668 0         0 return 1; # success
669             }
670              
671 52         598 return 0; # file not found
672             }
673              
674             # Fill $self with contents of ~/OEIS/A000000.html but various fragile greps
675             # of the html.
676             # Return 1 if .html or .htm file exists, 0 if not.
677             #
678             sub _read_html {
679 65     65   116 my ($self, $anum) = @_;
680             ### _read_html(): $anum
681              
682 65 100       235 return 0 if $self->{'_dont_use_html'};
683              
684 52         189 foreach my $basefile ("$anum.html", "$anum.htm") {
685 104 50       225 my ($fullname, $contents) = _slurp_oeis_file($self,$basefile)
686             or next;
687 0         0 $contents = _decode_html_charset($contents);
688              
689 0 0       0 if ($contents =~
690             m{$anum[ \t]*\n.*? # target anum
691             ]*>\s*(?:
692             ]*> #
693             \s*
694             (.*?) # text through to ...
695             (
| or
696             }isx) {
697 0         0 _set_description ($self, $1);
698             } else {
699             ### description not matched ...
700             }
701              
702 0   0     0 my $offset = ($contents =~ /OFFSET.*?<[tT][tT]>(\d+)/s
703             && $1);
704             ### $offset
705              
706             # fragile grep out of the html ...
707 0         0 my $keywords;
708 0 0       0 if ($contents =~ m{KEYWORD.*?<[tT][tT][^>]*>(.*?)}s) {
709             ### html keywords match: $1
710 0         0 $keywords = $1;
711             } else {
712             # die "Oops, KEYWORD not matched: $anum";
713             }
714 0         0 _set_characteristics ($self, $keywords);
715              
716 0 0       0 if (! $self->{'fh'}) {
717             # fragile grep out of the html ...
718 0         0 $contents =~ s{>graph.*}{};
719 0         0 $contents =~ m{.*([^<]+)}i;
720 0         0 my $list = $1;
721 0         0 _split_sample_values ($self, $list);
722             }
723              
724             # %O "OFFSET" is subscript of first number, but for digit expansions
725             # it's the position of the decimal point
726             # http://oeis.org/eishelp2.html#RO
727 0 0       0 if (! $self->{'characteristic'}->{'digits'}) {
728 0         0 $self->{'i'} = $self->{'i_start'} = $offset;
729             }
730             ### i: $self->{'i'}
731             ### i_start: $self->{'i_start'}
732              
733 0         0 return 1;
734             }
735 52         222 return 0;
736             }
737              
738             # Return the contents of ~/OEIS/$filename.
739             # $filename is like "A000000.html" to be taken relative to oeis_dir().
740             # If $filename cannot be read then return undef.
741             sub _slurp_oeis_file {
742 208     208   357 my ($self,$filename) = @_;
743 208         643 $filename = File::Spec->catfile (oeis_dir(), $filename);
744             ### $filename
745              
746 208 50       17882 if (! open FH, "< $filename") {
747             ### cannot open file: $!
748 208         1857 return;
749             }
750 0           my $contents = do { local $/; }; # slurp
  0            
  0            
751 0 0         close FH
752             or return;
753 0   0       $self->{'filename'} ||= $filename;
754 0           return ($filename, $contents);
755             }
756              
757             sub _set_description {
758 0     0     my ($self, $description) = @_;
759             ### _set_description(): $description
760              
761 0           $description =~ s/\s+$//; # trailing whitespace
762 0           $description =~ s/\s+/ /g; # collapse whitespace
763 0           $description =~ s/<[^>]*?>//sg; # tags
764 0           $description =~ s/</
765 0           $description =~ s/>/>/ig; # unentitize >
766 0           $description =~ s/&/&/ig; # unentitize &
767 0           $description =~ s/&#(\d+);/chr($1)/ge; # unentitize numeric ' and "
  0            
768              
769             # ENHANCE-ME: maybe __x() if made available, or an sprintf "... %s" would
770             # be enough ...
771 0           $description .= "\n";
772 0 0         if ($self->{'fh'}) {
773 0           $description .= sprintf(Math::NumSeq::__('Values from B-file %s'),
774             $self->{'filename'})
775             } else {
776 0           $description .= sprintf(Math::NumSeq::__('Values from %s'),
777             $self->{'filename'})
778             }
779 0           $self->{'description'} = $description;
780             }
781              
782             sub _set_characteristics {
783 0     0     my ($self, $keywords) = @_;
784             ### _set_characteristics()
785             ### $keywords
786              
787 0 0         if (! defined $keywords) {
788 0           return; # if perhaps match of .html failed
789             }
790              
791 0           $keywords =~ s{<[^>]*>}{}g; # tags
792             ### $keywords
793              
794 0   0       foreach my $key (split /[, \t]+/, ($keywords||'')) {
795             ### $key
796 0           $self->{'characteristic'}->{"OEIS_$key"} = 1;
797             }
798              
799             # if ($self->{'characteristic'}->{'OEIS_cofr'}) {
800             # $self->{'characteristic'}->{'continued_fraction'} = 1;
801             # }
802              
803             # "cons" means decimal digits of a constant
804             # but don't reckon A000012 all-ones that way
805             # "base" means non-decimal, it seems, maybe
806 0 0 0       if ($self->{'characteristic'}->{'OEIS_cons'}
      0        
807             && ! $self->{'characteristic'}->{'OEIS_base'}
808             && $self->{'anum'} ne 'A000012') {
809 0           $self->{'values_min'} = 0;
810 0           $self->{'values_max'} = 9;
811 0           $self->{'characteristic'}->{'digits'} = 10;
812             }
813              
814 0 0         if (defined (my $description = $self->{'description'})) {
815 0 0         if ($description =~ /expansion of .* in base (\d+)/i) {
816 0           $self->{'values_min'} = 0;
817 0           $self->{'values_max'} = $1 - 1;
818 0           $self->{'characteristic'}->{'digits'} = $1;
819             }
820 0 0         if ($description =~ /^number of /i) {
821 0           $self->{'characteristic'}->{'count'} = 1;
822             }
823             }
824             }
825              
826             sub _split_sample_values {
827 0     0     my ($self, $str) = @_;
828             ### _split_sample_values(): $str
829 0 0 0       unless (defined $str && $str =~ m{^([0-9,-]|\s)+$}) {
830 0 0         croak "Oops list of sample values not recognised in ",$self->{'filename'},"\n",
831             (defined $str ? $str : ());
832             }
833 0           $self->{'array'} = [ split /[, \t\r\n]+/, $str ];
834             }
835              
836             sub _decode_html_charset {
837 0     0     my ($contents) = @_;
838              
839             # eg.
840             # HTTP::Message has a blob of code for this, using the full HTTP::Parser,
841             # but a slack regexp should be enough for OEIS pages.
842             #
843 0 0 0       if (_HAVE_ENCODE
844             && $contents =~ m{]+
845             http-equiv=[^>]+
846             content-type[^>]+
847             charset=([a-z0-9-_]+)}isx) {
848 0           return Encode::decode($1, $contents, Encode::FB_PERLQQ());
849             } else {
850 0           return $contents;
851             }
852             }
853              
854             #------------------------------------------------------------------------------
855              
856             # Similar bsearch to Search::Dict, but Search::Dict doesn't allow for
857             # comment lines at the start of the file or blank lines at the end.
858             #
859             #use Smart::Comments;
860              
861             sub ith {
862 0     0 1   my ($self, $i) = @_;
863             ### ith(): "$i cf fh_i=".($self->{'fh_i'} || -999)
864              
865 0 0         if (my $fh = $self->{'fh'}) {
866 0 0         if (! defined $self->{'next_seek'}) {
867 0           $self->{'next_seek'} = tell($fh);
868             }
869              
870 0 0 0       if (defined $self->{'fh_i'} && $i <= $self->{'fh_i'} + 20) {
871             ### fh_i is target ...
872 0 0         if (my ($line_i, $value) = _readline($self)) {
873 0 0         if ($line_i == $i) {
874 0           return $value;
875             }
876             }
877             }
878              
879 0           my $lo = 0;
880 0           my $hi = -s $fh;
881 0           for (;;) {
882             ### at: "lo=$lo hi=$hi consider mid=".int(($lo+$hi)/2)
883 0           my $mid = int(($lo+$hi)/2);
884 0           _seek ($self, $mid);
885              
886 0 0         if (! defined(readline $fh)) {
887             ### mid is EOF ...
888 0           last;
889             }
890             ### skip partial line to: tell($fh)
891 0           $mid = tell($fh);
892 0 0         if ($mid >= $hi) {
893 0           last;
894             }
895              
896 0 0         my ($line_i,$value) = _readline($self)
897             or last; # only blank lines between $mid and EOF, go linear
898              
899             ### $line_i
900             ### $value
901 0 0         if ($line_i == $i) {
902             ### found by binary search ...
903 0           return $value;
904             }
905 0 0         if ($line_i < $i) {
906             ### line_i before the target, advance lo ...
907 0           $lo = tell($fh);
908             } else {
909             ### line_i after target, reduce hi ...
910 0           $hi = $mid;
911             }
912             }
913              
914 0           _seek ($self, $lo);
915 0           for (;;) {
916 0 0         my ($line_i,$value) = _readline($self)
917             or last;
918 0 0         if ($line_i == $i) {
919             ### found by linear search ...
920 0           $self->{'fh_i'} = $line_i+1;
921 0           return $value;
922             }
923 0 0         if ($line_i > $i) {
924 0           return undef;
925             }
926             }
927 0           return undef;
928              
929             } else {
930 0           $i -= $self->i_start;
931 0 0         unless ($i >= 0) {
932 0           return undef; # negative or NaN
933             }
934 0           return $self->{'array'}->[$i];
935             }
936             }
937              
938             1;
939             __END__