File Coverage

blib/lib/Data/TableData/Lookup.pm
Criterion Covered Total %
statement 58 69 84.0
branch 16 26 61.5
condition 5 9 55.5
subroutine 5 5 100.0
pod 1 1 100.0
total 85 110 77.2


line stmt bran cond sub pod time code
1             package Data::TableData::Lookup;
2              
3 1     1   60491 use 5.010001;
  1         13  
4 1     1   5 use strict;
  1         2  
  1         21  
5 1     1   5 use warnings;
  1         2  
  1         44  
6              
7 1     1   5 use Exporter 'import';
  1         2  
  1         579  
8              
9             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
10             our $DATE = '2022-05-06'; # DATE
11             our $DIST = 'Data-TableData-Lookup'; # DIST
12             our $VERSION = '0.003'; # VERSION
13              
14             our @EXPORT_OK = qw(table_vlookup);
15              
16             our %SPEC;
17              
18             $SPEC{table_vlookup} = {
19             v => 1.1,
20             summary => 'Look up value in a table row by row',
21             description => <<'_',
22              
23             This routine looks up value in a table row by row. It is similar to the
24             spreadsheet function VLOOKUP, hence the same name being used. It is basically a
25             glorified map()+grep() that returns a single value (or you can also say it's a
26             glorified map+L::first()).
27              
28             Given a table, which is either an array-of-arrayrefs (aoa) or array-of-hashrefs
29             (aoh), this routine will run through it row by row until it finds the value that
30             you want. Once found, the value will be returned. Otherwise, undef is returned.
31              
32             **Exact matching**
33              
34             The table is expected to be sorted in ascending order by the lookup field. You
35             specify a lookup value, which will be looked up in the lookup field. Once the
36             value is found, the result field of the correspending row is returned and lookup
37             is completed. When the lookup field already exceeds the lookup value, the
38             routine also concludes that the value is not found, and the lookup is completed.
39              
40             Example:
41              
42             table => [
43             {min_income=> 0, tax_rate=>0.13},
44             {min_income=> 8_000, tax_rate=>0.18},
45             {min_income=> 15_000, tax_rate=>0.22},
46             {min_income=> 35_000, tax_rate=>0.30},
47             {min_income=> 85_000, tax_rate=>0.39},
48             {min_income=>140_000, tax_rate=>0.45},
49             ],
50             lookup_field => 'min_income',
51             lookup_value => 35_000,
52             result_field => 'tax_rate',
53              
54             will result in:
55              
56             0.304
57              
58             while if the lookup_value is 40_000, undef will be returned since it is not
59             found in any row of the table.
60              
61             **Approximate matching**
62              
63             If `approx` option is set to true, once the lookup field in a row exceeds the
64             lookup value, the result field of the previous row will be returned (if any).
65             For example, if lookup value is 40_000 then 0.30 will be returned (the row where
66             `min_income` is 35_000) since the next row has `min_income` of 85_000 which
67             already exceeds 40_000.
68              
69             **Interpolation of result**
70              
71             If, additionally, `interpolate` option is also set to true in addition to
72             `approx` option being set to true, a linear interpolation will be done when an
73             exact match fails. In the previous example, when lookup value is 40_000, 0.309
74             will be returned, which is calculated with:
75              
76             0.3 + (40_000 - 35_000)/(85_000 - 35_000)*(0.39 - 0.30)
77              
78             In the case of there is no next row after `min_income` of 35_000, 0.30 will
79             still be returned.
80              
81              
82             _
83             args => {
84             table => {
85             schema => 'array*',
86             req => 1,
87             description => <<'_',
88              
89             Either an aoaos (array of aos's a.k.a. array-of-scalars) or aohos (array of
90             hos's a.k.a. hash of scalars), e.g.:
91              
92             # aoaos
93             [ # col1, col2, col3
94             [1,2,3],
95             [4,5,6],
96             [7,8,9], ]
97              
98             or:
99              
100             # aohos
101             [ {col1=>1, col2=>2, col3=>3},
102             {col1=>4, col2=>5, col3=>6},
103             {col1=>7, col2=>8, col3=>9}, ]
104              
105             _
106             },
107             lookup_value => {
108             summary => 'The value that you want to look up in the lookup field',
109             description => <<'_',
110              
111             Instead of `lookup_value` and `lookup_field`, you can also specify `lookup_code`
112             instead.
113              
114             _
115             schema => 'any*',
116             },
117              
118             lookup_field => {
119             summary => 'Where to look up the lookup value in',
120             description => <<'_',
121              
122             Either an integer array index (for aoaos table) or a string hash key (for aohos
123             table).
124              
125             Instead of `lookup_value` and `lookup_field`, you can also specify `lookup_code`
126             instead.
127              
128             _
129             schema => 'str*',
130             },
131              
132             lookup_code => {
133             summary => 'Supply code to match a row',
134             description => <<'_',
135              
136             Unless what you want to match is custom, you usually specify `lookup_value` and
137             `lookup_field` instead.
138              
139             The code will be passed the row (which is an arrayref or a hashref) and
140             optionally the lookup value too as the second argument if the lookup value is
141             specified. It is expected to return either -1, 0, 1 like the Perl's `cmp` or
142             `<=>` operator. -1 means the lookup field is less than the lookup value, 0 means
143             equal, and 1 means greater than.
144              
145             With `approx` option not set to true, lookup will succeed once 0 is returned.
146             With `approx` set to true, lookup will succeed once 0 or 1 is returned.
147              
148             _
149             schema => 'code*',
150             },
151              
152             result_field => {
153             summary => 'Where to get the result from',
154             schema => 'str*',
155             description => <<'_',
156              
157             Either an integer array index (for aoa table) or a string hash key (for aoh
158             table).
159              
160             _
161             req => 1,
162             },
163              
164             # XXX result_code (instead of result_field)
165              
166             approx => {
167             summary => 'Whether to do an approximate instead of an exact match',
168             schema => 'bool*',
169             description => <<'_',
170              
171             See example in the function description.
172              
173             _
174             },
175             interpolate => {
176             summary => 'Do a linear interpolation',
177             schema => 'bool*',
178             description => <<'_',
179              
180             When this option is set to true, will do a linear interpolation of result when
181             an exact match is not found. This will only be performed if `approx` is also set
182             to true.
183              
184             See example in the function description.
185              
186             Currently, you cannot use `interpolate` with `lookup_code`.
187              
188             _
189             },
190             },
191             args_rels => [
192             'choose_all&' => [
193             [qw/lookup_field lookup_value/],
194             ],
195             'req_one&' => [
196             [qw/lookup_field lookup_code/],
197             ],
198             'dep_any&' => [
199             ['interpolate' => ['approx']],
200             ],
201             ],
202             result_naked => 1,
203             };
204             sub table_vlookup {
205 13     13 1 157 my %args = @_;
206              
207 13         22 my $table = $args{table};
208 13         17 my $approx = $args{approx};
209 13         20 my $interpolate = $args{interpolate};
210 13         14 my $lookup_value = $args{lookup_value};
211 13         19 my $lookup_field = $args{lookup_field};
212 13         18 my $lookup_code = $args{lookup_code};
213 13         17 my $lookup_value_specified = exists $args{lookup_code};
214 13         17 my $result_field = $args{result_field};
215              
216 13         15 my $ref_row;
217 13         28 my ($matching_row, $prev_row);
218 13         0 my $result;
219             ROW:
220 13         19 for my $row (@$table) {
221 24         32 $ref_row = ref $row;
222              
223 24         28 my $cmp;
224 24 50       40 if ($lookup_code) {
225 0         0 my @lcargs = ($row);
226 0 0       0 push @lcargs, $lookup_value if $lookup_value_specified;
227 0         0 $cmp = $lookup_code->(@lcargs);
228             } else {
229 24 50       35 if ($ref_row eq 'ARRAY') {
230 0         0 $cmp = $row->[$lookup_field] <=> $lookup_value;
231             } else {
232 24         49 $cmp = $row->{$lookup_field} <=> $lookup_value;
233             }
234             }
235 24 100       52 if ($cmp == 0) {
    100          
    50          
236             # an exact match
237 5         7 $matching_row = $row;
238 5         31 goto GET_EXACT_RESULT;
239             } elsif ($cmp == 1) {
240             # lookup field has exceeded lookup value
241 6 100 100     22 if ($approx && $prev_row) {
242 3 100       8 if ($interpolate) {
243 1         2 $matching_row = $row;
244 1         7 goto GET_INTERPOLATED_RESULT;
245             } else {
246 2         5 $matching_row = $prev_row;
247 2         12 goto GET_EXACT_RESULT;
248             }
249             } else {
250             # no exact match, not found
251 3         27 goto RETURN_RESULT;
252             }
253             } elsif ($cmp == -1) {
254             # lookup value has not exceeded lookup value, continue to the next
255             # row
256             } else {
257 0         0 die "Something's wrong, cmp is not -1|0|1 ($cmp)";
258             }
259 13         18 $prev_row = $row;
260             }
261              
262 2 50 33     10 if ($approx && $prev_row) {
263 2         4 $matching_row = $prev_row;
264 2         5 goto GET_EXACT_RESULT;
265             } else {
266             # not found
267 0         0 goto RETURN_RESULT;
268             }
269              
270             GET_EXACT_RESULT: {
271 9 50       11 last unless $matching_row; # sanity check
  9         20  
272 9 50       17 if ($ref_row eq 'ARRAY') {
273 0         0 $result = $matching_row->[$result_field];
274             } else {
275 9         15 $result = $matching_row->{$result_field};
276             }
277 9         20 goto RETURN_RESULT;
278             }
279              
280             GET_INTERPOLATED_RESULT: {
281 1 50 33     2 last unless $matching_row && $prev_row; # sanity check
  1         7  
282 1         3 my ($x1, $x2, $y1, $y2);
283 1 50       3 if ($ref_row eq 'ARRAY') {
284 0         0 $x1 = $prev_row ->[$lookup_field];
285 0         0 $x2 = $matching_row->[$lookup_field];
286 0         0 $y1 = $prev_row ->[$result_field];
287 0         0 $y2 = $matching_row->[$result_field];
288             } else {
289 1         2 $x1 = $prev_row ->{$lookup_field};
290 1         2 $x2 = $matching_row->{$lookup_field};
291 1         2 $y1 = $prev_row ->{$result_field};
292 1         2 $y2 = $matching_row->{$result_field};
293             }
294 1         6 $result = $y1 + ($lookup_value - $x1)/($x2-$x1)*($y2-$y1);
295             }
296              
297             RETURN_RESULT:
298 13         59 $result;
299             }
300              
301             1;
302             # ABSTRACT: Lookup value in a table data structure
303              
304             __END__