File Coverage

blib/lib/Set/Integer/Gapfillers.pm

Criterion	Covered	Total	%
statement	147	148	99.3
branch	67	70	95.7
condition	12	15	80.0
subroutine	10	10	100.0
pod	4	4	100.0
total	240	247	97.1

line	stmt	bran	cond	sub	pod	time	code
1							package Set::Integer::Gapfillers;
2							$VERSION = '0.08';
3	8			8		285826	use strict;
	8					21
	8					513
4	8			8		49	use Carp;
	8					17
	8					20746
5
6							sub new {
7	45			45	1	34066	my $class = shift;
8	45					166	my $initref = _args_check(@_);
9	33					149	return bless $initref, $class;
10							}
11
12							sub all_segments {
13	35			35	1	20845	my $self = shift;
14	35					101	my %params = _check_extra_args(@_);
15	34					715	_expand_upon_request($self->{segments}, \%params);
16							}
17
18							sub gapfillers {
19	29			29	1	67714	my $self = shift;
20	29					591	my %params = _check_extra_args(@_);
21	28					640	my @segments = @{$self->{segments}};
	28					90
22	28					628	my @gaps;
23	28					92	for (my $n = 0; $n <= $#segments; $n++) {
24	133	100				442	push @gaps, $segments[$n] unless $self->{statuses}->[$n];
25							}
26	28					706	_expand_upon_request( [ @gaps ], \%params);
27							}
28
29							sub segments_needed {
30	7			7	1	12775	my $self = shift;
31	7					21	my %params = _check_extra_args(@_);
32	6					10	my @segments_needed = @{ $self->{segments} };
	6					19
33							# If the lower bound to the requested range fell in a provided segment,
34							# then the first segment returned may have "unneeded" elements on its
35							# lower side.
36							# Analogously, if the upper bound fell in a provided segment,
37							# then the last segment returned may have "unneeded" elements on its
38							# upper side.
39							# We need to snip these unneeded elements off.
40	6	100				23	if ($self->{statuses}->[0]) {
41	4	50				17	if ($segments_needed[0]->[0] < $self->{lower}) {
42	4					10	$segments_needed[0]->[0] = $self->{lower};
43							}
44							}
45	6	100				17	if ($self->{statuses}->[-1]) {
46	4	50				16	if ($segments_needed[-1]->[1] > $self->{upper}) {
47	4					11	$segments_needed[-1]->[1] = $self->{upper};
48							}
49							}
50	6					20	_expand_upon_request([ @segments_needed ], \%params);
51							}
52
53							sub _args_check {
54	45			45		180	my %args = @_;
55	45	100				427	croak "Need lower bound: $!" unless defined $args{lower};
56	44	100				354	croak "Need upper bound: $!" unless defined $args{upper};
57	43	100				449	croak "Lower bound must be numeric: $!" unless $args{lower} =~ /^-?\d+$/;
58	42	100				366	croak "Upper bound must be numeric: $!" unless $args{upper} =~ /^-?\d+$/;
59	41	100				264	croak "Upper bound must be >= lower bound: $!"
60							unless $args{upper} >= $args{lower};
61	40	100				242	croak "Need 'sets' argument: $!" unless defined $args{sets};
62	39	100				279	croak "'sets' must be array reference: $!"
63							unless ref($args{sets}) eq 'ARRAY';
64	38					52	foreach my $pairref (@{$args{sets}}) {
	38					91
65	124					479	croak "Elements of 'sets' must be 2-element array references: $!"
66	124	100				317	unless scalar(@{$pairref}) == 2;
67	123					142	foreach my $n (@{$pairref}) {
	123					195
68	245	100				1164	croak "Elements of sets must be numeric: $!"
69							unless $n =~ /^-?\d+$/;
70							}
71	122	100				712	croak "First element of each array must be <= second element\n$pairref->[0] has problem: $!"
72							unless $pairref->[0] <= $pairref->[1];
73							}
74	34					61	my @sets = sort { $a->[0] <=> $b->[0] } @{$args{sets}};
	119					285
	34					172
75	34					52	my (@firsts, @seconds);
76	34					122	for (my $i=0; $i<=$#sets; $i++) {
77	119	100				220	if ($i > 0) {
78	85	100				361	croak "First element of each array must be > second element of previous array: $!"
79							unless ($sets[$i]->[0] > $sets[$i-1]->[1]);
80							} else {
81	34					48	1;
82							}
83	118					195	$firsts[$i] = $sets[$i]->[0];
84	118					341	$seconds[$i] = $sets[$i]->[1];
85							}
86	33					54	my %intermediate;
87	33					81	$intermediate{sets} = \@sets;
88	33					67	$intermediate{firsts} = \@firsts;
89	33					67	$intermediate{seconds} = \@seconds;
90	33					74	$intermediate{lower} = $args{lower};
91	33					60	$intermediate{upper} = $args{upper};
92	33					149	my ($segmentsref, $statusref) = _calculate(%intermediate);
93	33					162	my %init = (
94							segments => $segmentsref,
95							statuses => $statusref,
96							lower => $intermediate{lower},
97							upper => $intermediate{upper},
98							);
99	33					158	return \%init;
100							}
101
102							sub _calculate {
103	33			33		115	my %args = @_;
104	33					70	my @all_segments;
105							my @statuses;
106	0					0	my %status;
107							# Inspect for either of two oddball but easy-to-compute cases.
108							# Cases of $args{lower} in gap after last provided segment
109							# and $args{upper} in gap before first provided segment
110	33	100	100			254	if (
111							$args{firsts}->[0] > $args{upper}
112							or
113							$args{seconds}->[-1] < $args{lower}
114							) {
115	2					7	push @all_segments, [ $args{lower}, $args{upper} ];
116	2					3	push @statuses, 0;
117	2					9	return (\@all_segments, \@statuses);
118							}
119	31					44	my $i = 0;
120	31					42	my $j = scalar(@{$args{seconds}}) - 1;
	31					67
121	31					197	$i++ until ( $args{lower} < $args{firsts}->[$i] );
122	31					131	$j-- while ( $args{upper} < $args{firsts}->[$j] );
123							# $status{xxx} true: starting (ending) in provided segment
124							# $status{xxx} false: starting (ending) in subsequent gap
125	31	100				109	$status{lower} = ($args{lower} <= $args{seconds}->[$i-1]) ? 1 : 0;
126	31	100				105	$status{upper} = ($args{upper} <= $args{seconds}->[$j] ) ? 1 : 0;
127
128							# Case of $args{lower} in gap below the first provided segment.
129							# I have to handle this separately because its $i value would be -1 ...
130							# which would created problems if used as an array subscript
131	31	100				93	if ($args{lower} < $args{firsts}->[0]) {
132	8					30	push @all_segments, [ $args{lower}, $args{firsts}->[0] - 1 ];
133	8					16	push @statuses, 0;
134	8	100				24	if ($j == 0) {
135	2					6	push @all_segments, $args{sets}[0];
136	2					2	push @statuses, 1;
137	2	100				8	if (! $status{upper}) {
138	1					3	push @all_segments, [ $args{seconds}->[0] + 1, $args{upper} ];
139	1					2	push @statuses, 0;
140							}
141							} else {
142	6					22	for my $p (0..$j-1) {
143	12					44	push @all_segments, $args{sets}[$p];
144	12					17	push @statuses, 1;
145	12	100				53	unless ($args{seconds}->[$p] + 1 == $args{firsts}->[$p+1]) {
146	10					32	push @all_segments,
147							[ $args{seconds}->[$p] + 1, $args{firsts}->[$p+1] - 1 ];
148	10					27	push @statuses, 0;
149							}
150							}
151	6					18	push @all_segments, $args{sets}[$j];
152	6					14	push @statuses, 1;
153	6	100				23	if (! $status{upper}) {
154	2					8	push @all_segments,
155							[ $args{seconds}->[$j] + 1, $args{upper} ];
156	2					5	push @statuses, 0;
157							}
158							}
159	8					41	return (\@all_segments, \@statuses);
160							}
161
162							# Cases where $args{lower} and $args{upper} occur within same interior
163							# provided segment/following gap pair
164							# 3 sub-cases:
165							# both in segment
166							# lower in segment; upper in gap
167							# both in gap
168							# I want to handle these here so that subsequently I can proceed on
169							# assumption that lower and upper are in different pairs
170	23					43	my $h = $i - 1;
171	23	100				69	if ($h == $j) {
172	5	100	100			63	if ($status{lower} and $status{upper}) {
		100	66
		50	33
173	1					3	push @all_segments, $args{sets}[$h];
174	1					25	push @statuses, 1;
175							} elsif ($status{lower} and ! $status{upper}) {
176	1					3	push @all_segments, $args{sets}[$h];
177	1					2	push @statuses, 1;
178	1					4	push @all_segments,
179							[ $args{seconds}->[$h] + 1, $args{upper} ];
180	1					2	push @statuses, 0;
181							} elsif (! $status{lower} and ! $status{upper}) {
182	3					9	push @all_segments, [ $args{lower}, $args{upper} ];
183	3					5	push @statuses, 0;
184							}
185	5					22	return (\@all_segments, \@statuses);
186							}
187							# So now I'm ready to handle the remaining -- and most likely to occur --
188							# cases: Starting in one segment-gap pair (other than the first) and
189							# ending in a different segment-gap pair.
190							# First handle the location of the lower bound:
191	18	100				50	if ($status{lower}) {
192	12					39	push @all_segments, $args{sets}[$h];
193	12					57	push @statuses, 1;
194	12					49	push @all_segments,
195							[ $args{seconds}->[$h] + 1, $args{firsts}->[$i] - 1 ];
196	12					25	push @statuses, 0;
197							} else {
198	6					24	push @all_segments,
199							[ $args{lower}, $args{firsts}->[$i] - 1 ];
200	6					51	push @statuses, 0;
201							}
202							# Next handle all other segment-gap pairs except the last:
203	18					52	for my $p ($i..$j-1) {
204	26					50	push @all_segments, $args{sets}[$p];
205	26					36	push @statuses, 1;
206	26	100				111	unless ($args{seconds}->[$p] + 1 == $args{firsts}->[$p+1]) {
207	19					65	push @all_segments,
208							[ $args{seconds}->[$p] + 1, $args{firsts}->[$p+1] - 1 ];
209	19					48	push @statuses, 0;
210							}
211							}
212							# Finally, handle the final segment and possible gap:
213	18					46	push @all_segments, $args{sets}[$j];
214	18					36	push @statuses, 1;
215	18	100				46	if (! $status{upper}) {
216	12					39	push @all_segments,
217							[ $args{seconds}->[$j] + 1, $args{upper} ];
218	12					25	push @statuses, 0;
219							}
220	18					81	return (\@all_segments, \@statuses);
221							}
222
223							sub _expand_upon_request {
224	68			68		100	my $compressed_ref = shift;
225	68					83	my $paramsref = shift;
226	68	100	100			872	if (defined $paramsref->{expand} and $paramsref->{expand}) {
227	9					10	my @expanded;
228	9					11	foreach my $pairref (@{$compressed_ref}) {
	9					18
229	47					182	push @expanded, [ $pairref->[0] .. $pairref->[1] ];
230							}
231	9					48	return [ @expanded ];
232							} else {
233	59					206	return $compressed_ref;
234							}
235							}
236
237							sub _check_extra_args {
238	71			71		132	my @args = @_;
239	71					91	my %params;
240	71	100				432	if ( scalar(@args) ) {
241	13	100				34	unless ( scalar(@args) % 2 ) {
242	10					29	%params = @args;
243							} else {
244	3					416	croak "Need even number of arguments: $!";
245							}
246							}
247	68					210	return %params;
248							}
249
250							1;
251
252							#################### DOCUMENTATION ####################
253
254							=head1 NAME
255
256							Set::Integer::Gapfillers - Fill in the gaps between integer ranges
257
258							=head1 SYNOPSIS
259
260							use Set::Integer::Gapfillers;
261							$gf = Set::Integer::Gapfillers->new(
262							lower => -12,
263							upper => 62,
264							sets => [
265							[ 1, 17 ], # Note: Use comma, not
266							[ 25, 42 ], # range operator (..)
267							[ 44, 50 ],
268							],
269							);
270
271							$segments_needed_ref = $gf->segments_needed();
272
273							$gapfillers_ref = $gf->gapfillers();
274
275							$all_segments_ref = $gf->all_segments();
276
277							Any of the three preceding output methods can also be called with an C
278							option:
279
280							$segments_needed_ref = $gf->segments_needed( expand => 1 );
281
282							=head1 DESCRIPTION
283
284							This Perl extension provides methods which may be useful in manipulating sets
285							whose elements are consecutive integers. Suppose that you are provided with
286							the following non-intersecting, non-overlapping sets of consecutive integers:
287
288							{ 1 .. 17 }
289							{ 25 .. 42 }
290							{ 44 .. 50 }
291
292							Suppose further that you are provided with the following lower and upper
293							bounds to a range of consecutive integers:
294
295							lower: 12
296							upper: 62
297
298							Provide a set of sets which:
299
300							=over 4
301
302							=item *
303
304							when joined together, would form a set of consecutive integers from the
305							lower to the upper bound, inclusive; and
306
307							=item *
308
309							are derived from:
310
311							=over 4
312
313							=item *
314
315							the sets provided;
316
317							=item *
318
319							proper subsets thereof; or
320
321							=item *
322
323							newly generated sets which fill in the gaps below, in between or above the
324							provided sets.
325
326							=back
327
328							=back
329
330							Once a Set::Integer::Gapfillers object has been constructed, its C
331							method can be used to provide these results:
332
333							{ 12 .. 17 } # subset of 1st set provided
334							{ 18 .. 24 } # gap-filler set
335							{ 25 .. 42 } # 2nd set provided
336							{ 43 .. 43 } # gap-filler set
337							# (which happens to consist of a single element)
338							{ 44 .. 50 } # 3rd set provided
339							{ 51 .. 62 } # gap-filler set for range above highest provided set
340
341							Alternatively, you may only wish to examine the gap-filler sets. The
342							C method provides this set of sets.
343
344							{ 18 .. 24 } # gap-filler set
345							{ 43 .. 43 } # gap-filler set
346							{ 51 .. 62 } # gap-filler set
347
348							And, as an additional alternative, you may wish to have your set of sets begin
349							or end with I the values of a given provided set, rather than a proper
350							subset thereof containing only those values needed to populate the desired
351							range. In that case, use the C method.
352
353							{ 1 .. 17 } # 1st set provided
354							{ 18 .. 24 } # gap-filler set
355							{ 25 .. 42 } # 2nd set provided
356							{ 43 .. 43 } # gap-filler set
357							# (which happens to consist of a single element)
358							{ 44 .. 50 } # 3rd set provided
359							{ 51 .. 62 } # gap-filler set for range above highest provided set
360
361							The results returned by the C method differ from those
362							returned by the C method only at the lower or upper ends.
363							If, as in the above example, the lower bound of the target range of integers
364							falls inside a provided segment, the first set returned by
365							C will be the I first set provided; the first set
366							returned by C will be a I of the first set
367							provided, starting with the requested lower bound.
368
369							=head1 USAGE
370
371							=head2 Publicly Callable Methods
372
373							=head3 C
374
375							$gf = Set::Integer::Gapfillers->new(
376							lower => -12,
377							upper => 62,
378							sets => [
379							[ 1, 17 ], # Note: Use comma, not
380							[ 25, 42 ], # range operator (..)
381							[ 44, 50 ],
382							],
383							);
384
385							B Constructor of a Set::Integer::Gapfillers object.
386
387							B List of key-value pairs. C and C take integers
388							denoting the lower and upper bounds of the range of integers desired as the
389							result. C takes a reference to an anonymous array whose elements are,
390							in turn, references to anonymous arrays whose B elements are the lowest
391							and highest numbers in a range of consecutive integers.
392
393							I The sets of consecutive integers supplied must be non-overlapping.
394							Set::Integer::Gapfillers will C if supplied with arguments such as these:
395
396							$gf = Set::Integer::Gapfillers->new(
397							lower => -12, upper => 62,
398							sets => [
399							[ 1, 30 ], # no good: overlaps with next set
400							[ 25, 48 ], # no good: overlaps with previous and next sets
401							[ 44, 50 ], # no good: overlaps with previous set
402							],
403							);
404
405							I Only two elements should be supplied in the anonymous arrays
406							supplied as elements to the array reference which is the value of C:
407							the lowest and highest (or, first and last) elements in each array. You
408							should B use Perl's range operator (I C<[ 25 .. 48 ]>) in this
409							instance.
410
411							B A Set::Integer::Gapfillers object.
412
413							=head3 C
414
415							$segments_needed_ref = $gf->segments_needed();
416
417							B Generate a set of sets which (a) when joined together, would
418							form a set of consecutive integers from the lower to the upper bound,
419							inclusive; and (b) are derived from (i) the sets provided; (ii) proper
420							subsets thereof; or (iii) newly generated sets which fill in the gaps below,
421							in between or above the provided sets.
422
423							B None required. C 1> is optional (see FAQ).
424
425							B A reference to an anonymous array whose elements are, in turn,
426							anonymous arrays of two elements: the lowest and highest integers in a
427							particular subset. But when the C option is set, the return value is
428							a reference to an anonymous array whose elements are, in turn, references to
429							arrays each of which holds I of each set needed -- not just
430							the beginning and end points.
431
432
433							=head3 C
434
435							$gapfillers_ref = $gf->gapfillers();
436
437							B Generate a set of the newly generated sets needed to fill in the
438							gaps below, in between or above the sets provided to the constructor. The
439							sets, like those returned by C, are denoted by their lower
440							and upper bounds rather than by their entire contents.
441
442							B None required. C 1> is optional (see FAQ).
443
444							B A reference to an anonymous array whose elements are, in turn,
445							anonymous arrays holding two elements: the lower and upper bounds of the
446							integer ranges needed to provide gap-filling as described in 'Purpose'. When
447							the C option is set, the contents of those inner sets are expanded to
448							include the full range of integers needed, not just the beginning and end
449							points.
450
451							=head3 C
452
453							$all_segments_ref = $gf->all_segments();
454
455							B Generate a set of all sets needed in order to populate a set of
456							consecutive integers from the lower to the upper bound, inclusive. The sets
457							generated are derived from (a) the sets provided or (b) newly generated sets
458							which fill in the gaps below, in between or above the provided sets.
459
460							B None required. C 1> is optional (see FAQ).
461
462							B A reference to an anonymous array whose elements are, in turn,
463							anonymous arrays holding the sets described in 'Purpose'. When
464							the C option is set, the contents of those inner sets are expanded to
465							include the full range of integers needed, not just the beginning and end
466							points.
467
468							=head1 FAQ
469
470							=over 4
471
472							=item 1. How do the sets returned by the three non-constructor methods differ from one another?
473
474							With C, the objective is: I
475							I need to fill the desired range will come from sets already provided or from
476							newly created gap-filling sets.>
477
478							With C, the objective is: I
479							will need to create to fill the gaps between the sets already provided.>
480
481							With C, the objective is: I
482							-- those already provided and those I will have to create -- from which I will
483							pull integers to populate the desired range.>
484
485							Here are two examples:
486
487							=over 4
488
489							=item *
490
491							$gf = Set::Integer::Gapfillers->new(
492							lower => 10,
493							upper => 22,
494							sets => [
495							[ 9, 11 ],
496							[ 15, 18 ],
497							[ 20, 24 ],
498							],
499							);
500
501							The three non-constructor methods return sets as follows:
502
503							9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
504							segments_needed: A A B B B C C C C D E E E
505							gapfillers: B B B D
506							all_segments: A A A B B B C C C C D E E E E E
507
508							... where C, C and C are elements coming from provided sets and C
509							and C are coming from newly-created gap-filling sets.
510
511							=item *
512
513							$gf = Set::Integer::Gapfillers->new(
514							lower => 10,
515							upper => 22,
516							sets => [
517							[ 9, 11 ],
518							[ 15, 18 ],
519							[ 20, 20 ],
520							],
521							);
522
523							The three non-constructor methods return sets as follows:
524
525							9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
526							segments_needed: A A B B B C C C C D E F F
527							gapfillers: B B B D F F
528							all_segments: A A A B B B C C C C D E F F
529
530							... where C, C and C are elements coming from provided sets and C,
531							C and C are coming from newly-created gap-filling sets.
532
533							=back
534
535							=item 2. Why do the output methods, by default, return references to two-element arrays rather than the full range of integers needed?
536
537							Memory and speed.
538
539							In an earlier implementation, F calculated its return values
540							by supplying the constructor's C argument with a list of references to
541							arrays of consecutive integers -- C<[ 12 .. 22 ]> -- rather than a list of
542							references to two-element arrays of the lower and upper bounds of the integer
543							ranges desired -- C<[ 12, 22 ]>. All internal calculations were made by
544							comparing the lower and upper bounds supplied with the arrays supplied. This
545							proved to be a memory hog and slow.
546
547							F was then revised to
548							require the user to supply only the beginning and end points of the provided
549							segments. Although this complicated the logic of the internal calculations
550							for the module author, it led to a vastly reduced memory footprint and vast
551							speedup in producing results. It was therefore decided to make the output
552							methods return values in the same manner, I beginning and end points of
553							ranges, rather than the entire ranges.
554
555							However, what an end-user of F might really be after is those
556							entire ranges. Hence, the C 1> option is provided so that the
557							results look like this:
558
559							$gf = Set::Integer::Gapfillers->new(
560							lower => -12,
561							upper => 62,
562							sets => [
563							[ 1, 17 ],
564							[ 25, 42 ],
565							[ 44, 50 ],
566							],
567							);
568
569							$segments_needed_ref = $gf->( expand => 1);
570							__END__
571							$segments_needed_ref: [
572							[-12 .. 0 ], # without 'expand': [ -12, 0 ]
573							[ 1 .. 17 ],
574							[ 18 .. 24 ],
575							[ 25 .. 42 ],
576							[ 43 .. 43 ],
577							[ 44 .. 50 ],
578							[ 51 .. 62 ],
579							]
580
581							=back
582
583							=head1 BUGS
584
585							None reported so far.
586
587							=head1 SUPPORT
588
589							Via e-mail to author at address below.
590
591							=head1 AUTHOR
592
593							James E Keenan
594							CPAN ID: JKEENAN
595							jkeenan@cpan.org
596							http://search.cpan.org/~jkeenan/
597
598							=head1 ACKNOWLEDGEMENTS
599
600							This Perl extension has its origin in a question I posed on Perlmonks
601							(L). BrowserUK's response
602							(L) was ingenious and terse and led me
603							to think that the solution could be modularized. However, when I realized
604							that my original question had not fully specified my objective, I found I
605							could no longer use BrowserUK's algorithm and had to work my own out -- so any
606							bugs are my fault, not his!
607
608							=head1 COPYRIGHT
609
610							Copyright 2006. James E. Keenan. United States.
611
612							This program is free software; you can redistribute
613							it and/or modify it under the same terms as Perl itself.
614
615							The full text of the license can be found in the
616							LICENSE file included with this module.
617
618							=head1 SEE ALSO
619
620							perl(1). During the Perlmonks thread mentioned in ACKNOWLEDGMENTS, reference
621							was made to CPAN module Set::Infinite (L), and specifically to
622							C as possibly providing another solution to the
623							gap-filling problem. Set::Array and Set::Scalar should also be consulted if
624							you need a wider arrange of methods to perform set operations.
625
626							=cut