File Coverage

blib/lib/Search/Dict.pm
Criterion Covered Total %
statement 46 64 71.8
branch 24 54 44.4
condition 8 14 57.1
subroutine 4 4 100.0
pod 0 1 0.0
total 82 137 59.8


line stmt bran cond sub pod time code
1             package Search::Dict;
2             require 5.000;
3             require Exporter;
4              
5             my $fc_available;
6             BEGIN {
7 1     1   32819 $fc_available = '5.015008';
8 1 50       7 if ( $] ge $fc_available ) {
9 1         9 require feature;
10 1         117 'feature'->import('fc'); # string avoids warning on old Perls
11             }
12             }
13              
14 1     1   6 use strict;
  1         2  
  1         1104  
15              
16             our $VERSION = '1.07';
17             our @ISA = qw(Exporter);
18             our @EXPORT = qw(look);
19              
20             =head1 NAME
21              
22             Search::Dict - look - search for key in dictionary file
23              
24             =head1 SYNOPSIS
25              
26             use Search::Dict;
27             look *FILEHANDLE, $key, $dict, $fold;
28              
29             use Search::Dict;
30             look *FILEHANDLE, $params;
31              
32             =head1 DESCRIPTION
33              
34             Sets file position in FILEHANDLE to be first line greater than or equal
35             (stringwise) to I<$key>. Returns the new file position, or -1 if an error
36             occurs.
37              
38             The flags specify dictionary order and case folding:
39              
40             If I<$dict> is true, search by dictionary order (ignore anything but word
41             characters and whitespace). The default is honour all characters.
42              
43             If I<$fold> is true, ignore case. The default is to honour case.
44              
45             If there are only three arguments and the third argument is a hash
46             reference, the keys of that hash can have values C, C, and
47             C or C (see below), and their corresponding values will be
48             used as the parameters.
49              
50             If a comparison subroutine (comp) is defined, it must return less than zero,
51             zero, or greater than zero, if the first comparand is less than,
52             equal, or greater than the second comparand.
53              
54             If a transformation subroutine (xfrm) is defined, its value is used to
55             transform the lines read from the filehandle before their comparison.
56              
57             =cut
58              
59             sub look {
60 7     7 0 9595 my($fh,$key,$dict,$fold) = @_;
61 7         11 my ($comp, $xfrm);
62 7 50 33     37 if (@_ == 3 && ref $dict eq 'HASH') {
63 0         0 my $params = $dict;
64 0         0 $dict = 0;
65 0 0       0 $dict = $params->{dict} if exists $params->{dict};
66 0 0       0 $fold = $params->{fold} if exists $params->{fold};
67 0 0       0 $comp = $params->{comp} if exists $params->{comp};
68 0 0       0 $xfrm = $params->{xfrm} if exists $params->{xfrm};
69             }
70 7 50   53   43 $comp = sub { $_[0] cmp $_[1] } unless defined $comp;
  53         135  
71 7         172 local($_);
72 7         20 my $fno = fileno $fh;
73 7         15 my @stat;
74 7 100 66     41 if ( defined $fno && $fno >= 0 && ! tied *{$fh} ) { # real, open file
  5   100     185  
75 4         6 @stat = eval { stat($fh) }; # in case fileno lies
  4         234  
76             }
77 7         183 my($size, $blksize) = @stat[7,11];
78 7 100       17 $size = do { seek($fh,0,2); my $s = tell($fh); seek($fh,0,0); $s }
  3         9  
  3         15  
  3         9  
  3         9  
79             unless defined $size;
80 7   100     25 $blksize ||= 8192;
81 7 100       184 $key =~ s/[^\w\s]//g if $dict;
82 7 100       25 if ( $fold ) {
83 3 50       13 $key = $] ge $fc_available ? fc($key) : lc($key);
84             }
85             # find the right block
86 7         17 my($min, $max) = (0, int($size / $blksize));
87 7         7 my $mid;
88 7         20 while ($max - $min > 1) {
89 0         0 $mid = int(($max + $min) / 2);
90 0 0       0 seek($fh, $mid * $blksize, 0)
91             or return -1;
92 0 0       0 <$fh> if $mid; # probably a partial line
93 0         0 $_ = <$fh>;
94 0 0       0 $_ = $xfrm->($_) if defined $xfrm;
95 0         0 chomp;
96 0 0       0 s/[^\w\s]//g if $dict;
97 0 0       0 if ( $fold ) {
98 0 0       0 $_ = $] ge $fc_available ? fc($_) : lc($_);
99             }
100 0 0 0     0 if (defined($_) && $comp->($_, $key) < 0) {
101 0         0 $min = $mid;
102             }
103             else {
104 0         0 $max = $mid;
105             }
106             }
107             # find the right line
108 7         17 $min *= $blksize;
109 7 50       87 seek($fh,$min,0)
110             or return -1;
111 7 50       21 <$fh> if $min;
112 7         9 for (;;) {
113 54         245 $min = tell($fh);
114 54 100       1605 defined($_ = <$fh>)
115             or last;
116 53 50       234 $_ = $xfrm->($_) if defined $xfrm;
117 53         58 chomp;
118 53 100       248 s/[^\w\s]//g if $dict;
119 53 100       88 if ( $fold ) {
120 3 50       10 $_ = $] ge $fc_available ? fc($_) : lc($_);
121             }
122 53 100       255 last if $comp->($_, $key) >= 0;
123             }
124 7         37 seek($fh,$min,0);
125 7         57 $min;
126             }
127              
128             1;