File Coverage

lib/ControlBreak.pm
Criterion Covered Total %
statement 111 120 92.5
branch 24 30 80.0
condition 2 3 66.6
subroutine 18 20 90.0
pod 12 12 100.0
total 167 185 90.2


line stmt bran cond sub pod time code
1             # ControlBreak.pm - Compare values during iteration to detect changes
2            
3             # Done:
4             # - switch from using say join to using printf in the Synopsis example
5            
6             # To Do:
7             # - provide an accumulate method that counts and sums an arbitrary number of named variables
8            
9            
10             =head1 NAME
11            
12             ControlBreak - Compare values during iteration to detect changes
13            
14             =head1 SYNOPSIS
15            
16             use v5.18;
17            
18             use ControlBreak;
19            
20             # set up two levels, in minor to major order
21             my $cb = ControlBreak->new( qw( District Country ) );
22            
23             my $country_total = 0;
24             my $district_total = 0;
25            
26             while (my $line = ) {
27             chomp $line;
28            
29             my ($country, $district, $city, $population) = split ',', $line;
30            
31             # test the values (minor to major order)
32             $cb->test($district, $country);
33            
34             # break on District (or Country) detected
35             if ($cb->break('District')) {
36             printf "%s,%s,%d%s\n", $cb->last('Country'), $cb->last('District'), $district_total, '*';
37             $district_total = 0;
38             }
39            
40             # break on Country detected
41             if ($cb->break('Country')) {
42             printf "%s total,%s,%d%s\n", $cb->last('Country'), '', $country_total, '**';
43             $country_total = 0;
44             }
45            
46             $country_total += $population;
47             $district_total += $population;
48             }
49             continue {
50             # save the current values (as received by ->test) as the new
51             # 'last' values on the next iteration.
52             $cb->continue();
53             }
54            
55             # simulate break at end of data, if we iterated at least once
56             if ($cb->iteration > 0) {
57             printf "%s,%s,%d%s\n", $cb->last('Country'), $cb->last('District'), $district_total, '*';
58             printf "%s total,%s,%d%s\n", $cb->last('Country'), '', $country_total, '**';
59             }
60            
61             __DATA__
62             Canada,Alberta,Calgary,1019942
63             Canada,Ontario,Ottawa,812129
64             Canada,Ontario,Toronto,2600000
65             Canada,Quebec,Montreal,1704694
66             Canada,Quebec,Quebec City,531902
67             Canada,Quebec,Sherbrooke,161323
68             USA,Arizona,Phoenix,1640641
69             USA,California,Los Angeles,3919973
70             USA,California,San Jose,1026700
71             USA,Illinois,Chicago,2756546
72             USA,New York,New York City,8930002
73             USA,New York,Buffalo,281757
74             USA,Pennsylvania,Philadelphia,1619355
75             USA,Texas,Houston,2345606
76            
77             =head1 DESCRIPTION
78            
79             The B module provides a class that is used to detect
80             control breaks; i.e. when a value changes.
81            
82             Typically, the data being retrieved or iterated over is ordered and
83             there may be more than one value that is of interest. For example
84             consider a table of population data with columns for country,
85             district and city, sorted by country and district. With this module
86             you can create an object that will detect changes in the district or
87             country, considered level 1 and level 2 respectively. The calling
88             program can take action, such as printing subtotals, whenever level
89             changes are detected.
90            
91             Ordered data is not a requirement. An example using unordered data
92             would be counting consecutive numbers within a data stream; e.g. 0 0
93             1 1 1 1 0 1 1. Using ControlBreak you can detect each change and
94             count the consecutive values, yielding two zeros, four 1's, one zero,
95             and two 1's.
96            
97             Note that ControlBreak cannot detect the end of your data stream.
98             The B method is normally called within a loop to detect changes
99             in control variables, but once the last iteration is processed there
100             are no further calls to B as the loop ends. It may be necessary,
101             therefore, to do additional processing after the loop in order to
102             handle the very last data group; e.g. to print a final set of subtotals.
103            
104             To simplify this situation, method B can be used in
105             place of B and B.
106            
107             =cut
108            
109             ########################################################################
110             # perlcritic rules
111             ########################################################################
112            
113             ## no critic [ProhibitSubroutinePrototypes]
114            
115             # due to use of postfix dereferencing, we have to disable these warnings
116             ## no critic [References::ProhibitDoubleSigils]
117            
118             # perlcritic wants POD sections like VERSION, DIAGNOSTICS, CONFIGURATION AND ENVIRONMENT,
119             # and INCOMPATIBILITIES. But so far these are unneeded so we'll disable these warnings
120             # here so that perlcritic gives the module a clean bill of health.
121            
122             ## no critic (Documentation::RequirePodSections)
123            
124             ########################################################################
125             # Libraries and Features
126             ########################################################################
127 11     11   701222 use strict;
  11         102  
  11         269  
128 11     11   45 use warnings;
  11         17  
  11         211  
129 11     11   97 use v5.18;
  11         41  
130            
131 11     11   5094 use Object::Pad 0.66 qw( :experimental(init_expr) );
  11         98501  
  11         41  
132            
133             package ControlBreak;
134             class ControlBreak 1.00;
135            
136 11     11   4061 use Carp qw(croak);
  11         19  
  11         30205  
137            
138             # public attributes
139 17     17 1 176 field $iteration :reader { 0 }; # [0] counts iterations
  17         40  
140 0     0 1 0 field @level_names :reader; # [1] list of level names
  0         0  
141            
142             # private attributes
143             field $_num_levels; # [2] the number of control levels
144             field %_levname { }; # [3] map of levidx to levname
145             field %_levidx { }; # [4] map of lenname to levidx
146             field %_comp_op; # [5] comparison operators
147             field %_fcomp; # [6] comparison functions
148             field $_test_levelnum { 0 }; # [7] last level returned by test()
149             field $_test_levelname { '' }; # [8] last level returned by test()
150             field @_test_values; # [9] the values of the current test()
151             field @_last_values; # [10] the values from the previous test()
152             field $_continue_count { 0 }; # [11] the number of types continue was called
153            
154             =head1 FIELDS
155            
156             =head2 iteration
157            
158             A readonly field that provides the current iteration number.
159            
160             This can be useful if you are doing an final processing after an
161             iteration loop has ended. In the event that the data stream is empty
162             and there were no iterations, then you can condition your final
163             processing on iteration > 0.
164            
165             Note that B is incremented by B (or ).
166             Therefore, when called wihtin a loop it is effectively zero-based if
167             referenced within the iteration block before B is invoked, and
168             then one-based after B.
169            
170             =head2 level_names
171            
172             A readonly field that provides a list of the level names that were
173             provided as arguments to new().
174            
175             =cut
176            
177             ######################################################################
178             # Constructor (a.k.a. the new() method)
179             ######################################################################
180            
181             =head1 METHODS
182            
183             =head2 new ( [, ]... )
184            
185             Create a new ControlBreak object.
186            
187             Arguments are user-defined names for each level, in minor to major
188             order. The set of names must be unique, and they must each start
189             with a letter or underscore, followed by any number of letters,
190             numbers or underscores.
191            
192             A level name can also begin with a '+', which denotes that a numeric
193             comparison will be used for the values processed at this level.
194            
195             The number of arguments to new() determines the number of control levels
196             that will be monitored. The variables provided to method test() must
197             match in number and datatype to these operators.
198            
199             The order of the arguments corresponds to a hierachical level of
200             control, from lowest to highest; i.e. the first argument corresponds
201             to level 1, the second to level 2, etc. This also corresponds
202             to sort order, from minor to major, when iterating through a data stream.
203            
204             =cut
205            
206             BUILD {
207             croak '*E* at least one argument is required'
208             if @_ == 0;
209            
210             foreach my $arg (@_) {
211             croak '*E* invalid level name'
212             unless $arg =~ m{ \A [+]? [[:alpha:]_]\w+ }xms;
213             }
214            
215             $_num_levels = @_;
216            
217             my %lev_count;
218            
219             foreach my $arg (@_) {
220             $lev_count{$arg}++;
221             croak '*E* duplicate level name: ' . $arg
222             if $lev_count{$arg} > 1;
223             my $level_name = $arg;
224             my $is_numeric = $level_name =~ s{ \A [+] }{}xms;
225             push @level_names, $level_name;
226             my $op = $is_numeric ? '==' : 'eq';
227             $_comp_op{$level_name} = $op;
228             $_fcomp{$level_name} = _op_to_func($op);
229             }
230            
231             @_last_values = ( undef ) x $_num_levels;
232            
233             my $ii = 0;
234             map { $_levname{$ii++} = $_ } @level_names;
235            
236             $ii = 0;
237             map { $_levidx{$_} = $ii++ } @level_names;
238             }
239            
240             ######################################################################
241             # Public methods
242             ######################################################################
243            
244             =head2 break ( [ ] )
245            
246             The B method provides a convenient way to check whether the last
247             invocation of the test method resulted in a control break, or a
248             control break greater than or equal to the optionally
249             provided as an argument.
250            
251             For example, if you have levels 'City', 'State' and 'Country', and
252             there's a control break on level 1 (City), then invoking B
253             will return 1 and therefore be treated as true within a condition.
254             If there was no control break, then 0 (false) is returned.
255            
256             When invoked with a level name argument, B will map the level name
257             to a level number and compare it to the control break level determined
258             by the last invocation of test(). If the tested control break level
259             number is equal or higher than the argument level, then that level
260             number is returned and, since it will be non-zero, treated as a true
261             value within a condition. Otherwise, zero (false) is returned.
262            
263             Ultimately the point of this is that you can use it to write a series
264             of actions, like printing subtotals and clearing subtotal variables,
265             such that a higher level control break will trigger actions
266             associated with lower level control breaks. For example:
267            
268             my $cb = ControlBreak( qw/City State Country/ );
269            
270             if ( $cb->break() ) {
271             say '=== control break detected at level: ' . $cb->levelname;
272             }
273             if ( $cb->break('City') ) {
274             say "City total: $city";
275             $city = 0;
276             }
277             if ( $cb->break('State') ) {
278             say "State total: $state";
279             $state = 0;
280             }
281             if ( $cb->break('Country') ) {
282             say "Country total: $country";
283             $country = 0;
284             }
285            
286             In this example, when a Country control break is detected all three
287             subtotals will be printed. When a State control break is detected,
288             only State and City will print.
289            
290             =cut
291            
292 166     166 1 633 method break ( $level_name=undef ) {
  166         170  
  166         185  
  166         158  
293 166 50       229 if ($level_name) {
294             croak '*E* invalid level name: ' . $level_name
295 166 50       245 unless exists $_levidx{$level_name};
296 166         195 my $levnum = $_levidx{$level_name} + 1;
297 166         304 return $_test_levelnum >= $levnum;
298             }
299            
300 0         0 return $_test_levelnum;
301             }
302            
303             =head2 comparison ( level_name => [ 'eq' | '==' | sub ] ... )
304            
305             The B method accepts a hash which sets the comparison
306             operations for the designated levels. Keywords must match the level
307             names provide in new(). Values can be '==' for numeric comparison,
308             'eq' for alpha comparison, or anonymous subroutines.
309            
310             Anonymous subroutines must take two arguments, compare them in some
311             fashion, and return a boolean. The first argument to the comparison
312             routine will be the value passed to the test() method. The second
313             argument will be the corresponding value from the last iteration.
314            
315             All levels are provided with default comparison functions as determined
316             by new(). This method is provided so you can change one or more of
317             those defaults. Any level name not referenced by keys in the
318             argument list will be left unchanged.
319            
320             Some handy comparison functions are:
321            
322             # case-insensitive match
323             sub { lc $_[0] eq lc $_[1] }
324            
325             # strings coerced to numbers (so 07 and 7 are equal)
326             sub { ($_[0] + 0) == ($_[1] + 0) }
327            
328             # blank values treated as matched
329             sub { $_[0] eq '' ? 1 : $_[0] eq $_[1] }
330            
331             =cut
332            
333 6     6 1 645 method comparison (%h) {
  6         10  
  6         23  
  6         10  
334 6         42 while ( my ($level_name, $v) = each %h ) {
335             croak '*E* invalid level name: ' . $level_name
336 7 100       36 unless exists $_levidx{$level_name};
337 5         9 $_comp_op{$level_name} = $v;
338 5         13 $_fcomp{$level_name} = _op_to_func($v);
339             }
340             }
341            
342             =head2 continue ()
343            
344             Saves the values most recently provided to the test() method so they
345             can be compared to new values on the next iteration.
346            
347             On the next iteration these values will be accessible via the last()
348             method.
349            
350             B is best invoked within the continue block of a loop, to
351             make sure it isn't missed.
352            
353             B cannot be used in conjunction with B, which
354             internally calls B and B for you.
355            
356             =cut
357            
358 138     138 1 12632 method continue () {
  138         152  
  138         136  
359 138         281 @_last_values = @_test_values;
360 138         429 $_continue_count++;
361             }
362            
363             =head2 last ($level)
364            
365             Returns the value (of the corresponding level) that was given to the
366             B method called prior to the most recent one.
367            
368             The argument can be a level name or a level number.
369            
370             Normally this is used while iterating through a data stream. When a
371             level change (i.e. control break) is detected, the current data value
372             has changed relative to the preceding iteration. At this point it
373             may be necessary to take some action, such a printing a subtotal.
374             But, the subtotal will be for the preceding group of data and the
375             current value belongs to the next group. The B method allows
376             you to access the value for the group that was just processed so, for
377             example, the group name can be included on the subtotal line.
378            
379             For example, if control levels were named 'X' and 'Y' and you are
380             iterating through data and invoking test($x, $y) at each iteration,
381             then invoking $cb->last('Y') on iteration 9 will returns the value of
382             $y on iteration 8.
383            
384             Note that B should not be invoked before last() within the
385             scope of an iteration loop; i.e. continue() should be the last thing
386             done before the next turn of the loop.
387            
388             =cut
389            
390 88     88 1 1381 method last ($arg) {
  88         91  
  88         98  
  88         83  
391 88         88 my $retval;
392            
393 88 100       199 if ( $arg =~ m{ \A \d+ \Z }xms ) {
394             croak '*E* invalid level number: ' . $arg
395 1 50       14 unless exists $_levname{$arg};
396 0         0 $retval = $_last_values[$arg];
397             } else {
398             croak '*E* invalid level name: ' . $arg
399 87 100       159 unless exists $_levidx{$arg};
400 86         117 $retval = $_last_values[$_levidx{$arg}];
401             }
402            
403 86         951 return $retval;
404             }
405            
406             =head2 levelname
407            
408             Return the level name for the most recent invocation of the B
409             method.
410            
411             =cut
412            
413 0     0 1 0 method levelname () {
  0         0  
  0         0  
414 0         0 return $_test_levelname;
415             }
416            
417             =head2 levelnum
418            
419             Return the level number for the most recent invocation of the B
420             method.
421            
422             =cut
423            
424 86     86 1 292 method levelnum () {
  86         87  
  86         87  
425 86         144 return $_test_levelnum;
426             }
427            
428            
429             =head2 level_numbers
430            
431             Return a list of level numbers corresponding to the levels defined
432             in B. This can be useful, for example, when you want to
433             set up some lexical variables for use as indexes into a list you
434             might use to accumulate subtotals.
435            
436             my $cb = ControlBreak->new( qw( L1 L2 EOD ) );
437             my @totals;
438             my ($L1, $L2, $EOD) = $cb->level_numbers;
439            
440             foreach my $sublist (@list_of_lists) {
441             my ($control1, $control2, $number) = $sublist->@*;
442             ...
443             my $sub_totals = sub {
444             if ($cb->break('L1')) {
445             # report the L1 subtotal here
446             $totals[$L1] = 0; # clear the subtotal
447             }
448             ...
449             # accumulate subtotals
450             map { $totals[$_] += $number } $cb->level_numbers;
451             };
452            
453             $cb->test_and_do(
454             $control1,
455             $control2,
456             $cb->iteration == $list_of_lists - 1,
457             $sub_totals
458             );
459             }
460            
461            
462             =cut
463            
464 32     32 1 129 method level_numbers () {
  32         36  
  32         30  
465 32         70 return 1 .. $_num_levels;
466             }
467            
468             =head2 reset
469            
470             Resets the state of the object so it can be used again for another
471             set of iterations using the same number and type of controls
472             establish when the object was instantiated with new(). Any
473             comparisons that were subsequently modified are retained.
474            
475             =cut
476            
477 1     1 1 6 method reset () {
  1         1  
  1         2  
478 1         1 $iteration = 0;
479 1         2 $_continue_count = 0;
480 1         2 $_test_levelnum = 0;
481 1         1 $_test_levelname = 0;
482 1         2 @_test_values = ();
483 1         3 @_last_values = ( undef ) x $_num_levels;
484             }
485            
486             =head2 test ( $var1 [, $var2 ]... )
487            
488             Submits the control variables for testing against the values from the
489             previous iteration.
490            
491             Testing is done in reverse order, from highest to lowest (major to
492             minor) and stops once a change is detected. Where it stops determines
493             the control break level. For example, if $var2 changed, method
494             levelnum will return 2. If $var2 did not change, but $var1 did, then
495             method B will return 1. If nothing changes, then
496             B will return 0.
497            
498             Note that the level numbers set by B are true if there was
499             a level change, and false if there wasn't. So, they can be used as a
500             simple boolean test of whether there was a change. Or you can use
501             the B method to determine whether any control break has occured.
502            
503             Because level numbers correspond to the hierachy of data order, they
504             can be use to trigger multiple actions; e.g. B >= 1 could be
505             used to print subtotals for levels 1 whenever a control break occured
506             for level 1, 2 or 3. It is usually the case that higher control
507             breaks are meant to cascade to lower control levels and this can be
508             achieved in this fashion. The B method simplifies this.
509            
510             Note that method B must be called at the end of each
511             iteration in order to save the values of the iteration for the next
512             iteration. If not, the next B invocation will croak.
513            
514             =cut
515            
516 142     142 1 3474 method test (@args) {
  142         150  
  142         205  
  142         153  
517 142 100       285 croak '*E* number of arguments to test() must match those given in new()'
518             if @args != $_num_levels;
519            
520 140 100       246 croak '*E* continue() must be called after test()'
521             unless $iteration == $_continue_count;
522            
523 139         223 @_test_values = @args;
524            
525 139         149 $iteration++;
526            
527 139         140 my $is_break;
528 139         144 my $lev_idx = 0;
529            
530             # process tests in reverse order of arguments; i.e. major to minor
531 139         154 my $jj = @args;
532 139         202 foreach my $arg (reverse @args) {
533 244         269 $jj--;
534            
535             # on the first iteration, make the last values match the current
536             # ones so we don't detect any control break
537 244 100 66     410 $_last_values[$jj] //= $arg
538             if $iteration == 1;
539            
540 244         308 my $level_name = $_levname{$jj};
541            
542             # compare the current and last values using the comparison function
543             # if they don't match, then it's a control break
544 244         383 $is_break = not $_fcomp{$level_name}->( $arg, $_last_values[$jj] );
545            
546 244 100       502 if ( $is_break ) {
547             # internally our lists use the usual zero-based indexing
548             # but externally our level numbers are 1-based, where
549             # 1 is the most minor control variable. Level 0 is used
550             # to denote no level; i.e. no control break. Since zero
551             # is treated as false by perl, and non-zero as true, we
552             # can use the level number in a condition to determine if
553             # there's been a control break; ie. $level ? 'break' : 'no break'
554 71         84 $lev_idx = $jj + 1;
555 71         92 last;
556             }
557             }
558 139         162 my $lev_num = $lev_idx;
559            
560 139         167 $_test_levelnum = $lev_num;
561 139         199 $_test_levelname = $_levname{$jj};
562            
563 139         253 return;
564             }
565            
566             =head2 test_and_do ( $var1 [, $var2 ]... $var_end, $coderef )
567            
568             The B method is similar to B. It takes the same
569             arguments as B, plus one additional argument that is an
570             anonymous code reference. Internally, it calls B and then, if
571             there is a control break, calls the anonymous subroutine provided in
572             the last argument. Typically, that code will perform work related to
573             subtotals or other actions necessary when a control break occurs.
574            
575             But B does one other thing. It expects the last control
576             variable ($var_end) to be an end of data indicator, such as the perl
577             builtin operator B. This indicator should return false on each
578             iteration over the data until the very last iteration -- when it
579             should change to true, thereby triggering a major control break.
580            
581             What test_and_do does then is to add an extra loop. This simulates
582             a final record and will trigger B to signal control breaks
583             at all levels. Thus, the code provided will be executed between
584             every change of data AND after all data has been iterated over.
585            
586             This avoids the necessity of repeating the control break actions
587             you've put inside the data loop immediately after the loop's closing
588             bracket. When you just use B and B, an end-of-data
589             control break won't occur and the simplist workaround is to just
590             duplicate your control break code after the loops closing bracket.
591            
592             Here's a typical use case involving end of file processing. Note the
593             extra control level, named 'EOF', and the use of the B builtin
594             function as the second last argument of B:
595            
596             my $cb = ControlBreak->new( qw( L1 L2 EOF ) );
597            
598             my $lev1_subtotal = 0;
599             my $lev2_subtotal = 0;
600             my $grand_total = 0;
601            
602             while (my $line = <>) {
603             chomp $line;
604            
605             my ($lev1, $lev2, $data) = split "\t", $line;
606            
607             my $subtotal_coderef = sub {
608             if ($cb->break('L1')) {
609             say $cb->last('L1'), $cb->last('L2'), $lev1_subtotal . '*';
610             $lev1_subtotal = 0;
611             }
612             ...
613             if ($cb->break('EOF')) {
614             say 'Grand total,,', $grand_total, '***';
615             }
616            
617             $lev1_subtotal += $data;
618             $lev2_subtotal += $data;
619             $gran_total += $data;
620             }
621            
622             $cb->test_and_do($lev1, $lev2, eof, $subtotal_coderef);
623             }
624            
625             Also note that if your subroutine needs to reference variables
626             defined outside the scope of the loop (as in this case with the
627             totalling variables) then it needs to be defined within the loop so
628             it can be a closure over the variables in the enclosing scope.
629            
630             Another typical use case involves iterating over a list of values.
631             Here, we have no built in function to tell us when we've reached the
632             final value, but if we have a fixed list of values we can use the
633             length of the list and test it against the value returned by the
634             ControlBreak iterator method. For example:
635            
636             my $cb = ControlBreak->new( qw( L1 L2 EOD ) );
637            
638             my $lev1_subtotal = 0;
639             my $lev2_subtotal = 0;
640             my $grand_total = 0;
641            
642             my $last_iter = @data - 1;
643            
644             foreach my $line (@data {
645             chomp $line;
646             my ($lev1, $lev2, $data) = split "\t", $line;
647            
648             my $subtotal_coderef = sub {
649             if ($cb->break('L1')) {
650             say $cb->last('L1'), $cb->last('L2'), $lev1_subtotal . '*';
651             $lev1_subtotal = 0;
652             }
653             ...
654             if ($cb->break('EOD')) {
655             say 'Grand total,,', $grand_total, '***';
656             }
657            
658             $lev1_subtotal += $data;
659             $lev2_subtotal += $data;
660             $gran_total += $data;
661             }
662            
663             $cb->test_and_do($lev1, $lev2, $cb->iteration == $last_iter, $subtotal_coderef);
664             }
665            
666             =cut
667            
668 28     28 1 166 method test_and_do (@args) {
  28         31  
  28         56  
  28         29  
669            
670 28 50       49 croak '*E* test_and_do must have one more argument than new()'
671             unless @args == $_num_levels + 1;
672            
673 28         42 my $coderef = pop @args;
674 28         35 my $eod = 0 + $args[-1];
675            
676 28 50       48 croak '*E* last argument of test_and_do must be a code reference'
677             unless ref $coderef eq 'CODE';
678            
679 28         45 for my $ii (0..$eod) {
680 30         35 $args[-1] = $ii;
681 30         54 $self->test(@args);
682 30         57 $coderef->();
683 30         169 $self->continue;
684             }
685            
686             }
687            
688             ######################################################################
689             # Private subroutines and functions
690             ######################################################################
691 35     35   41 sub _op_to_func ($op) {
  35         47  
  35         38  
692            
693 35         40 my $fcompare;
694            
695 35 100       92 if ($op eq '==') {
    100          
    50          
696 4     32   13 $fcompare = sub { $_[0] == $_[1] };
  32         66  
697             }
698             elsif ($op eq 'eq') {
699 27     177   83 $fcompare = sub { $_[0] eq $_[1] };
  177         291  
700             }
701             elsif (ref $op eq 'CODE') {
702 4         6 $fcompare = $op;
703             }
704             else {
705 0         0 croak '*F* invalid comparison operator: ' . $op;
706             }
707            
708 35         102 return $fcompare;
709             }
710            
711             1;
712            
713             __END__