File Coverage

lib/ControlBreak.pm

Criterion	Covered	Total	%
statement	111	120	92.5
branch	24	30	80.0
condition	2	3	66.6
subroutine	18	20	90.0
pod	12	12	100.0
total	167	185	90.2

line	stmt	bran	cond	sub	pod	time	code
1							# ControlBreak.pm - Compare values during iteration to detect changes
2
3							# Done:
4							# - switch from using say join to using printf in the Synopsis example
5
6							# To Do:
7							# - provide an accumulate method that counts and sums an arbitrary number of named variables
8
9
10							=head1 NAME
11
12							ControlBreak - Compare values during iteration to detect changes
13
14							=head1 SYNOPSIS
15
16							use v5.18;
17
18							use ControlBreak;
19
20							# set up two levels, in minor to major order
21							my $cb = ControlBreak->new( qw( District Country ) );
22
23							my $country_total = 0;
24							my $district_total = 0;
25
26							while (my $line = ) {
27							chomp $line;
28
29							my ($country, $district, $city, $population) = split ',', $line;
30
31							# test the values (minor to major order)
32							$cb->test($district, $country);
33
34							# break on District (or Country) detected
35							if ($cb->break('District')) {
36							printf "%s,%s,%d%s\n", $cb->last('Country'), $cb->last('District'), $district_total, '*';
37							$district_total = 0;
38							}
39
40							# break on Country detected
41							if ($cb->break('Country')) {
42							printf "%s total,%s,%d%s\n", $cb->last('Country'), '', $country_total, '**';
43							$country_total = 0;
44							}
45
46							$country_total += $population;
47							$district_total += $population;
48							}
49							continue {
50							# save the current values (as received by ->test) as the new
51							# 'last' values on the next iteration.
52							$cb->continue();
53							}
54
55							# simulate break at end of data, if we iterated at least once
56							if ($cb->iteration > 0) {
57							printf "%s,%s,%d%s\n", $cb->last('Country'), $cb->last('District'), $district_total, '*';
58							printf "%s total,%s,%d%s\n", $cb->last('Country'), '', $country_total, '**';
59							}
60
61							__DATA__
62							Canada,Alberta,Calgary,1019942
63							Canada,Ontario,Ottawa,812129
64							Canada,Ontario,Toronto,2600000
65							Canada,Quebec,Montreal,1704694
66							Canada,Quebec,Quebec City,531902
67							Canada,Quebec,Sherbrooke,161323
68							USA,Arizona,Phoenix,1640641
69							USA,California,Los Angeles,3919973
70							USA,California,San Jose,1026700
71							USA,Illinois,Chicago,2756546
72							USA,New York,New York City,8930002
73							USA,New York,Buffalo,281757
74							USA,Pennsylvania,Philadelphia,1619355
75							USA,Texas,Houston,2345606
76
77							=head1 DESCRIPTION
78
79							The B module provides a class that is used to detect
80							control breaks; i.e. when a value changes.
81
82							Typically, the data being retrieved or iterated over is ordered and
83							there may be more than one value that is of interest. For example
84							consider a table of population data with columns for country,
85							district and city, sorted by country and district. With this module
86							you can create an object that will detect changes in the district or
87							country, considered level 1 and level 2 respectively. The calling
88							program can take action, such as printing subtotals, whenever level
89							changes are detected.
90
91							Ordered data is not a requirement. An example using unordered data
92							would be counting consecutive numbers within a data stream; e.g. 0 0
93							1 1 1 1 0 1 1. Using ControlBreak you can detect each change and
94							count the consecutive values, yielding two zeros, four 1's, one zero,
95							and two 1's.
96
97							Note that ControlBreak cannot detect the end of your data stream.
98							The B method is normally called within a loop to detect changes
99							in control variables, but once the last iteration is processed there
100							are no further calls to B as the loop ends. It may be necessary,
101							therefore, to do additional processing after the loop in order to
102							handle the very last data group; e.g. to print a final set of subtotals.
103
104							To simplify this situation, method B can be used in
105							place of B and B.
106
107							=cut
108
109							########################################################################
110							# perlcritic rules
111							########################################################################
112
113							## no critic [ProhibitSubroutinePrototypes]
114
115							# due to use of postfix dereferencing, we have to disable these warnings
116							## no critic [References::ProhibitDoubleSigils]
117
118							# perlcritic wants POD sections like VERSION, DIAGNOSTICS, CONFIGURATION AND ENVIRONMENT,
119							# and INCOMPATIBILITIES. But so far these are unneeded so we'll disable these warnings
120							# here so that perlcritic gives the module a clean bill of health.
121
122							## no critic (Documentation::RequirePodSections)
123
124							########################################################################
125							# Libraries and Features
126							########################################################################
127	11			11		701222	use strict;
	11					102
	11					269
128	11			11		45	use warnings;
	11					17
	11					211
129	11			11		97	use v5.18;
	11					41
130
131	11			11		5094	use Object::Pad 0.66 qw( :experimental(init_expr) );
	11					98501
	11					41
132
133							package ControlBreak;
134							class ControlBreak 1.00;
135
136	11			11		4061	use Carp qw(croak);
	11					19
	11					30205
137
138							# public attributes
139	17			17	1	176	field $iteration :reader { 0 }; # [0] counts iterations
	17					40
140	0			0	1	0	field @level_names :reader; # [1] list of level names
	0					0
141
142							# private attributes
143							field $_num_levels; # [2] the number of control levels
144							field %_levname { }; # [3] map of levidx to levname
145							field %_levidx { }; # [4] map of lenname to levidx
146							field %_comp_op; # [5] comparison operators
147							field %_fcomp; # [6] comparison functions
148							field $_test_levelnum { 0 }; # [7] last level returned by test()
149							field $_test_levelname { '' }; # [8] last level returned by test()
150							field @_test_values; # [9] the values of the current test()
151							field @_last_values; # [10] the values from the previous test()
152							field $_continue_count { 0 }; # [11] the number of types continue was called
153
154							=head1 FIELDS
155
156							=head2 iteration
157
158							A readonly field that provides the current iteration number.
159
160							This can be useful if you are doing an final processing after an
161							iteration loop has ended. In the event that the data stream is empty
162							and there were no iterations, then you can condition your final
163							processing on iteration > 0.
164
165							Note that B is incremented by B (or ).
166							Therefore, when called wihtin a loop it is effectively zero-based if
167							referenced within the iteration block before B is invoked, and
168							then one-based after B.
169
170							=head2 level_names
171
172							A readonly field that provides a list of the level names that were
173							provided as arguments to new().
174
175							=cut
176
177							######################################################################
178							# Constructor (a.k.a. the new() method)
179							######################################################################
180
181							=head1 METHODS
182
183							=head2 new ( [, ]... )
184
185							Create a new ControlBreak object.
186
187							Arguments are user-defined names for each level, in minor to major
188							order. The set of names must be unique, and they must each start
189							with a letter or underscore, followed by any number of letters,
190							numbers or underscores.
191
192							A level name can also begin with a '+', which denotes that a numeric
193							comparison will be used for the values processed at this level.
194
195							The number of arguments to new() determines the number of control levels
196							that will be monitored. The variables provided to method test() must
197							match in number and datatype to these operators.
198
199							The order of the arguments corresponds to a hierachical level of
200							control, from lowest to highest; i.e. the first argument corresponds
201							to level 1, the second to level 2, etc. This also corresponds
202							to sort order, from minor to major, when iterating through a data stream.
203
204							=cut
205
206							BUILD {
207							croak 'E at least one argument is required'
208							if @_ == 0;
209
210							foreach my $arg (@_) {
211							croak 'E invalid level name'
212							unless $arg =~ m{ \A [+]? [[:alpha:]_]\w+ }xms;
213							}
214
215							$_num_levels = @_;
216
217							my %lev_count;
218
219							foreach my $arg (@_) {
220							$lev_count{$arg}++;
221							croak 'E duplicate level name: ' . $arg
222							if $lev_count{$arg} > 1;
223							my $level_name = $arg;
224							my $is_numeric = $level_name =~ s{ \A [+] }{}xms;
225							push @level_names, $level_name;
226							my $op = $is_numeric ? '==' : 'eq';
227							$_comp_op{$level_name} = $op;
228							$_fcomp{$level_name} = _op_to_func($op);
229							}
230
231							@_last_values = ( undef ) x $_num_levels;
232
233							my $ii = 0;
234							map { $_levname{$ii++} = $_ } @level_names;
235
236							$ii = 0;
237							map { $_levidx{$_} = $ii++ } @level_names;
238							}
239
240							######################################################################
241							# Public methods
242							######################################################################
243
244							=head2 break ( [ ] )
245
246							The B method provides a convenient way to check whether the last
247							invocation of the test method resulted in a control break, or a
248							control break greater than or equal to the optionally
249							provided as an argument.
250
251							For example, if you have levels 'City', 'State' and 'Country', and
252							there's a control break on level 1 (City), then invoking B
253							will return 1 and therefore be treated as true within a condition.
254							If there was no control break, then 0 (false) is returned.
255
256							When invoked with a level name argument, B will map the level name
257							to a level number and compare it to the control break level determined
258							by the last invocation of test(). If the tested control break level
259							number is equal or higher than the argument level, then that level
260							number is returned and, since it will be non-zero, treated as a true
261							value within a condition. Otherwise, zero (false) is returned.
262
263							Ultimately the point of this is that you can use it to write a series
264							of actions, like printing subtotals and clearing subtotal variables,
265							such that a higher level control break will trigger actions
266							associated with lower level control breaks. For example:
267
268							my $cb = ControlBreak( qw/City State Country/ );
269
270							if ( $cb->break() ) {
271							say '=== control break detected at level: ' . $cb->levelname;
272							}
273							if ( $cb->break('City') ) {
274							say "City total: $city";
275							$city = 0;
276							}
277							if ( $cb->break('State') ) {
278							say "State total: $state";
279							$state = 0;
280							}
281							if ( $cb->break('Country') ) {
282							say "Country total: $country";
283							$country = 0;
284							}
285
286							In this example, when a Country control break is detected all three
287							subtotals will be printed. When a State control break is detected,
288							only State and City will print.
289
290							=cut
291
292	166			166	1	633	method break ( $level_name=undef ) {
	166					170
	166					185
	166					158
293	166	50				229	if ($level_name) {
294							croak 'E invalid level name: ' . $level_name
295	166	50				245	unless exists $_levidx{$level_name};
296	166					195	my $levnum = $_levidx{$level_name} + 1;
297	166					304	return $_test_levelnum >= $levnum;
298							}
299
300	0					0	return $_test_levelnum;
301							}
302
303							=head2 comparison ( level_name => [ 'eq' \| '==' \| sub ] ... )
304
305							The B method accepts a hash which sets the comparison
306							operations for the designated levels. Keywords must match the level
307							names provide in new(). Values can be '==' for numeric comparison,
308							'eq' for alpha comparison, or anonymous subroutines.
309
310							Anonymous subroutines must take two arguments, compare them in some
311							fashion, and return a boolean. The first argument to the comparison
312							routine will be the value passed to the test() method. The second
313							argument will be the corresponding value from the last iteration.
314
315							All levels are provided with default comparison functions as determined
316							by new(). This method is provided so you can change one or more of
317							those defaults. Any level name not referenced by keys in the
318							argument list will be left unchanged.
319
320							Some handy comparison functions are:
321
322							# case-insensitive match
323							sub { lc $_[0] eq lc $_[1] }
324
325							# strings coerced to numbers (so 07 and 7 are equal)
326							sub { ($_[0] + 0) == ($_[1] + 0) }
327
328							# blank values treated as matched
329							sub { $_[0] eq '' ? 1 : $_[0] eq $_[1] }
330
331							=cut
332
333	6			6	1	645	method comparison (%h) {
	6					10
	6					23
	6					10
334	6					42	while ( my ($level_name, $v) = each %h ) {
335							croak 'E invalid level name: ' . $level_name
336	7	100				36	unless exists $_levidx{$level_name};
337	5					9	$_comp_op{$level_name} = $v;
338	5					13	$_fcomp{$level_name} = _op_to_func($v);
339							}
340							}
341
342							=head2 continue ()
343
344							Saves the values most recently provided to the test() method so they
345							can be compared to new values on the next iteration.
346
347							On the next iteration these values will be accessible via the last()
348							method.
349
350							B is best invoked within the continue block of a loop, to
351							make sure it isn't missed.
352
353							B cannot be used in conjunction with B, which
354							internally calls B and B for you.
355
356							=cut
357
358	138			138	1	12632	method continue () {
	138					152
	138					136
359	138					281	@_last_values = @_test_values;
360	138					429	$_continue_count++;
361							}
362
363							=head2 last ($level)
364
365							Returns the value (of the corresponding level) that was given to the
366							B method called prior to the most recent one.
367
368							The argument can be a level name or a level number.
369
370							Normally this is used while iterating through a data stream. When a
371							level change (i.e. control break) is detected, the current data value
372							has changed relative to the preceding iteration. At this point it
373							may be necessary to take some action, such a printing a subtotal.
374							But, the subtotal will be for the preceding group of data and the
375							current value belongs to the next group. The B method allows
376							you to access the value for the group that was just processed so, for
377							example, the group name can be included on the subtotal line.
378
379							For example, if control levels were named 'X' and 'Y' and you are
380							iterating through data and invoking test($x, $y) at each iteration,
381							then invoking $cb->last('Y') on iteration 9 will returns the value of
382							$y on iteration 8.
383
384							Note that B should not be invoked before last() within the
385							scope of an iteration loop; i.e. continue() should be the last thing
386							done before the next turn of the loop.
387
388							=cut
389
390	88			88	1	1381	method last ($arg) {
	88					91
	88					98
	88					83
391	88					88	my $retval;
392
393	88	100				199	if ( $arg =~ m{ \A \d+ \Z }xms ) {
394							croak 'E invalid level number: ' . $arg
395	1	50				14	unless exists $_levname{$arg};
396	0					0	$retval = $_last_values[$arg];
397							} else {
398							croak 'E invalid level name: ' . $arg
399	87	100				159	unless exists $_levidx{$arg};
400	86					117	$retval = $_last_values[$_levidx{$arg}];
401							}
402
403	86					951	return $retval;
404							}
405
406							=head2 levelname
407
408							Return the level name for the most recent invocation of the B
409							method.
410
411							=cut
412
413	0			0	1	0	method levelname () {
	0					0
	0					0
414	0					0	return $_test_levelname;
415							}
416
417							=head2 levelnum
418
419							Return the level number for the most recent invocation of the B
420							method.
421
422							=cut
423
424	86			86	1	292	method levelnum () {
	86					87
	86					87
425	86					144	return $_test_levelnum;
426							}
427
428
429							=head2 level_numbers
430
431							Return a list of level numbers corresponding to the levels defined
432							in B. This can be useful, for example, when you want to
433							set up some lexical variables for use as indexes into a list you
434							might use to accumulate subtotals.
435
436							my $cb = ControlBreak->new( qw( L1 L2 EOD ) );
437							my @totals;
438							my ($L1, $L2, $EOD) = $cb->level_numbers;
439
440							foreach my $sublist (@list_of_lists) {
441							my ($control1, $control2, $number) = $sublist->@*;
442							...
443							my $sub_totals = sub {
444							if ($cb->break('L1')) {
445							# report the L1 subtotal here
446							$totals[$L1] = 0; # clear the subtotal
447							}
448							...
449							# accumulate subtotals
450							map { $totals[$_] += $number } $cb->level_numbers;
451							};
452
453							$cb->test_and_do(
454							$control1,
455							$control2,
456							$cb->iteration == $list_of_lists - 1,
457							$sub_totals
458							);
459							}
460
461
462							=cut
463
464	32			32	1	129	method level_numbers () {
	32					36
	32					30
465	32					70	return 1 .. $_num_levels;
466							}
467
468							=head2 reset
469
470							Resets the state of the object so it can be used again for another
471							set of iterations using the same number and type of controls
472							establish when the object was instantiated with new(). Any
473							comparisons that were subsequently modified are retained.
474
475							=cut
476
477	1			1	1	6	method reset () {
	1					1
	1					2
478	1					1	$iteration = 0;
479	1					2	$_continue_count = 0;
480	1					2	$_test_levelnum = 0;
481	1					1	$_test_levelname = 0;
482	1					2	@_test_values = ();
483	1					3	@_last_values = ( undef ) x $_num_levels;
484							}
485
486							=head2 test ( $var1 [, $var2 ]... )
487
488							Submits the control variables for testing against the values from the
489							previous iteration.
490
491							Testing is done in reverse order, from highest to lowest (major to
492							minor) and stops once a change is detected. Where it stops determines
493							the control break level. For example, if $var2 changed, method
494							levelnum will return 2. If $var2 did not change, but $var1 did, then
495							method B will return 1. If nothing changes, then
496							B will return 0.
497
498							Note that the level numbers set by B are true if there was
499							a level change, and false if there wasn't. So, they can be used as a
500							simple boolean test of whether there was a change. Or you can use
501							the B method to determine whether any control break has occured.
502
503							Because level numbers correspond to the hierachy of data order, they
504							can be use to trigger multiple actions; e.g. B >= 1 could be
505							used to print subtotals for levels 1 whenever a control break occured
506							for level 1, 2 or 3. It is usually the case that higher control
507							breaks are meant to cascade to lower control levels and this can be
508							achieved in this fashion. The B method simplifies this.
509
510							Note that method B must be called at the end of each
511							iteration in order to save the values of the iteration for the next
512							iteration. If not, the next B invocation will croak.
513
514							=cut
515
516	142			142	1	3474	method test (@args) {
	142					150
	142					205
	142					153
517	142	100				285	croak 'E number of arguments to test() must match those given in new()'
518							if @args != $_num_levels;
519
520	140	100				246	croak 'E continue() must be called after test()'
521							unless $iteration == $_continue_count;
522
523	139					223	@_test_values = @args;
524
525	139					149	$iteration++;
526
527	139					140	my $is_break;
528	139					144	my $lev_idx = 0;
529
530							# process tests in reverse order of arguments; i.e. major to minor
531	139					154	my $jj = @args;
532	139					202	foreach my $arg (reverse @args) {
533	244					269	$jj--;
534
535							# on the first iteration, make the last values match the current
536							# ones so we don't detect any control break
537	244	100	66			410	$_last_values[$jj] //= $arg
538							if $iteration == 1;
539
540	244					308	my $level_name = $_levname{$jj};
541
542							# compare the current and last values using the comparison function
543							# if they don't match, then it's a control break
544	244					383	$is_break = not $_fcomp{$level_name}->( $arg, $_last_values[$jj] );
545
546	244	100				502	if ( $is_break ) {
547							# internally our lists use the usual zero-based indexing
548							# but externally our level numbers are 1-based, where
549							# 1 is the most minor control variable. Level 0 is used
550							# to denote no level; i.e. no control break. Since zero
551							# is treated as false by perl, and non-zero as true, we
552							# can use the level number in a condition to determine if
553							# there's been a control break; ie. $level ? 'break' : 'no break'
554	71					84	$lev_idx = $jj + 1;
555	71					92	last;
556							}
557							}
558	139					162	my $lev_num = $lev_idx;
559
560	139					167	$_test_levelnum = $lev_num;
561	139					199	$_test_levelname = $_levname{$jj};
562
563	139					253	return;
564							}
565
566							=head2 test_and_do ( $var1 [, $var2 ]... $var_end, $coderef )
567
568							The B method is similar to B. It takes the same
569							arguments as B, plus one additional argument that is an
570							anonymous code reference. Internally, it calls B and then, if
571							there is a control break, calls the anonymous subroutine provided in
572							the last argument. Typically, that code will perform work related to
573							subtotals or other actions necessary when a control break occurs.
574
575							But B does one other thing. It expects the last control
576							variable ($var_end) to be an end of data indicator, such as the perl
577							builtin operator B. This indicator should return false on each
578							iteration over the data until the very last iteration -- when it
579							should change to true, thereby triggering a major control break.
580
581							What test_and_do does then is to add an extra loop. This simulates
582							a final record and will trigger B to signal control breaks
583							at all levels. Thus, the code provided will be executed between
584							every change of data AND after all data has been iterated over.
585
586							This avoids the necessity of repeating the control break actions
587							you've put inside the data loop immediately after the loop's closing
588							bracket. When you just use B and B, an end-of-data
589							control break won't occur and the simplist workaround is to just
590							duplicate your control break code after the loops closing bracket.
591
592							Here's a typical use case involving end of file processing. Note the
593							extra control level, named 'EOF', and the use of the B builtin
594							function as the second last argument of B:
595
596							my $cb = ControlBreak->new( qw( L1 L2 EOF ) );
597
598							my $lev1_subtotal = 0;
599							my $lev2_subtotal = 0;
600							my $grand_total = 0;
601
602							while (my $line = <>) {
603							chomp $line;
604
605							my ($lev1, $lev2, $data) = split "\t", $line;
606
607							my $subtotal_coderef = sub {
608							if ($cb->break('L1')) {
609							say $cb->last('L1'), $cb->last('L2'), $lev1_subtotal . '*';
610							$lev1_subtotal = 0;
611							}
612							...
613							if ($cb->break('EOF')) {
614							say 'Grand total,,', $grand_total, '***';
615							}
616
617							$lev1_subtotal += $data;
618							$lev2_subtotal += $data;
619							$gran_total += $data;
620							}
621
622							$cb->test_and_do($lev1, $lev2, eof, $subtotal_coderef);
623							}
624
625							Also note that if your subroutine needs to reference variables
626							defined outside the scope of the loop (as in this case with the
627							totalling variables) then it needs to be defined within the loop so
628							it can be a closure over the variables in the enclosing scope.
629
630							Another typical use case involves iterating over a list of values.
631							Here, we have no built in function to tell us when we've reached the
632							final value, but if we have a fixed list of values we can use the
633							length of the list and test it against the value returned by the
634							ControlBreak iterator method. For example:
635
636							my $cb = ControlBreak->new( qw( L1 L2 EOD ) );
637
638							my $lev1_subtotal = 0;
639							my $lev2_subtotal = 0;
640							my $grand_total = 0;
641
642							my $last_iter = @data - 1;
643
644							foreach my $line (@data {
645							chomp $line;
646							my ($lev1, $lev2, $data) = split "\t", $line;
647
648							my $subtotal_coderef = sub {
649							if ($cb->break('L1')) {
650							say $cb->last('L1'), $cb->last('L2'), $lev1_subtotal . '*';
651							$lev1_subtotal = 0;
652							}
653							...
654							if ($cb->break('EOD')) {
655							say 'Grand total,,', $grand_total, '***';
656							}
657
658							$lev1_subtotal += $data;
659							$lev2_subtotal += $data;
660							$gran_total += $data;
661							}
662
663							$cb->test_and_do($lev1, $lev2, $cb->iteration == $last_iter, $subtotal_coderef);
664							}
665
666							=cut
667
668	28			28	1	166	method test_and_do (@args) {
	28					31
	28					56
	28					29
669
670	28	50				49	croak 'E test_and_do must have one more argument than new()'
671							unless @args == $_num_levels + 1;
672
673	28					42	my $coderef = pop @args;
674	28					35	my $eod = 0 + $args[-1];
675
676	28	50				48	croak 'E last argument of test_and_do must be a code reference'
677							unless ref $coderef eq 'CODE';
678
679	28					45	for my $ii (0..$eod) {
680	30					35	$args[-1] = $ii;
681	30					54	$self->test(@args);
682	30					57	$coderef->();
683	30					169	$self->continue;
684							}
685
686							}
687
688							######################################################################
689							# Private subroutines and functions
690							######################################################################
691	35			35		41	sub _op_to_func ($op) {
	35					47
	35					38
692
693	35					40	my $fcompare;
694
695	35	100				92	if ($op eq '==') {
		100
		50
696	4			32		13	$fcompare = sub { $_[0] == $_[1] };
	32					66
697							}
698							elsif ($op eq 'eq') {
699	27			177		83	$fcompare = sub { $_[0] eq $_[1] };
	177					291
700							}
701							elsif (ref $op eq 'CODE') {
702	4					6	$fcompare = $op;
703							}
704							else {
705	0					0	croak 'F invalid comparison operator: ' . $op;
706							}
707
708	35					102	return $fcompare;
709							}
710
711							1;
712
713							__END__