File Coverage

blib/lib/List/UtilsBy.pm
Criterion Covered Total %
statement 122 124 98.3
branch 29 30 96.6
condition 8 8 100.0
subroutine 18 18 100.0
pod 15 15 100.0
total 192 195 98.4


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2009-2015 -- leonerd@leonerd.org.uk
5              
6             package List::UtilsBy;
7              
8 13     13   347326 use strict;
  13         26  
  13         502  
9 13     13   78 use warnings;
  13         20  
  13         746  
10              
11             our $VERSION = '0.10';
12              
13 13     13   81 use Exporter 'import';
  13         21  
  13         25794  
14              
15             our @EXPORT_OK = qw(
16             sort_by
17             nsort_by
18             rev_sort_by
19             rev_nsort_by
20              
21             max_by nmax_by
22             min_by nmin_by
23              
24             uniq_by
25              
26             partition_by
27             count_by
28              
29             zip_by
30             unzip_by
31              
32             extract_by
33             extract_first_by
34              
35             weighted_shuffle_by
36              
37             bundle_by
38             );
39              
40             =head1 NAME
41              
42             C - higher-order list utility functions
43              
44             =head1 SYNOPSIS
45              
46             use List::UtilsBy qw( nsort_by min_by );
47              
48             use File::stat qw( stat );
49             my @files_by_age = nsort_by { stat($_)->mtime } @files;
50              
51             my $shortest_name = min_by { length } @names;
52              
53             =head1 DESCRIPTION
54              
55             This module provides a number of list utility functions, all of which take an
56             initial code block to control their behaviour. They are variations on similar
57             core perl or C functions of similar names, but which use the block
58             to control their behaviour. For example, the core Perl function C takes
59             a list of values and returns them, sorted into order by their string value.
60             The C function sorts them according to the string value returned by
61             the extra function, when given each value.
62              
63             my @names_sorted = sort @names;
64              
65             my @people_sorted = sort_by { $_->name } @people;
66              
67             =cut
68              
69             =head1 FUNCTIONS
70              
71             All functions added since version 0.04 unless otherwise stated, as the
72             original names for earlier versions were renamed.
73              
74             =cut
75              
76             =head2 sort_by
77              
78             @vals = sort_by { KEYFUNC } @vals
79              
80             Returns the list of values sorted according to the string values returned by
81             the C block or function. A typical use of this may be to sort objects
82             according to the string value of some accessor, such as
83              
84             sort_by { $_->name } @people
85              
86             The key function is called in scalar context, being passed each value in turn
87             as both C<$_> and the only argument in the parameters, C<@_>. The values are
88             then sorted according to string comparisons on the values returned.
89              
90             This is equivalent to
91              
92             sort { $a->name cmp $b->name } @people
93              
94             except that it guarantees the C accessor will be executed only once per
95             value.
96              
97             One interesting use-case is to sort strings which may have numbers embedded in
98             them "naturally", rather than lexically.
99              
100             sort_by { s/(\d+)/sprintf "%09d", $1/eg; $_ } @strings
101              
102             This sorts strings by generating sort keys which zero-pad the embedded numbers
103             to some level (9 digits in this case), helping to ensure the lexical sort puts
104             them in the correct order.
105              
106             =cut
107              
108             sub sort_by(&@)
109             {
110 7     7 1 17 my $keygen = shift;
111              
112 7         12 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  10         19  
  10         15  
113 7         49 return @_[ sort { $keys[$a] cmp $keys[$b] } 0 .. $#_ ];
  4         23  
114             }
115              
116             =head2 nsort_by
117              
118             @vals = nsort_by { KEYFUNC } @vals
119              
120             Similar to C but compares its key values numerically.
121              
122             =cut
123              
124             sub nsort_by(&@)
125             {
126 8     8 1 24 my $keygen = shift;
127              
128 8         14 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  14         33  
  14         23  
129 8         67 return @_[ sort { $keys[$a] <=> $keys[$b] } 0 .. $#_ ];
  9         42  
130             }
131              
132             =head2 rev_sort_by
133              
134             =head2 rev_nsort_by
135              
136             @vals = rev_sort_by { KEYFUNC } @vals
137              
138             @vals = rev_nsort_by { KEYFUNC } @vals
139              
140             I
141              
142             Similar to C and C but returns the list in the reverse
143             order. Equivalent to
144              
145             @vals = reverse sort_by { KEYFUNC } @vals
146              
147             except that these functions are slightly more efficient because they avoid
148             the final C operation.
149              
150             =cut
151              
152             sub rev_sort_by(&@)
153             {
154 1     1 1 2 my $keygen = shift;
155              
156 1         2 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  2         5  
  2         15  
157 1         7 return @_[ sort { $keys[$b] cmp $keys[$a] } 0 .. $#_ ];
  1         7  
158             }
159              
160             sub rev_nsort_by(&@)
161             {
162 1     1 1 2 my $keygen = shift;
163              
164 1         3 my @keys = map { local $_ = $_; scalar $keygen->( $_ ) } @_;
  3         5  
  3         5  
165 1         6 return @_[ sort { $keys[$b] <=> $keys[$a] } 0 .. $#_ ];
  3         8  
166             }
167              
168             =head2 max_by
169              
170             $optimal = max_by { KEYFUNC } @vals
171              
172             @optimal = max_by { KEYFUNC } @vals
173              
174             Returns the (first) value from C<@vals> that gives the numerically largest
175             result from the key function.
176              
177             my $tallest = max_by { $_->height } @people
178              
179             use File::stat qw( stat );
180             my $newest = max_by { stat($_)->mtime } @files;
181              
182             In scalar context, the first maximal value is returned. In list context, a
183             list of all the maximal values is returned. This may be used to obtain
184             positions other than the first, if order is significant.
185              
186             If called on an empty list, an empty list is returned.
187              
188             For symmetry with the C function, this is also provided under the
189             name C since it behaves numerically.
190              
191             =cut
192              
193             sub max_by(&@)
194             {
195 9     9 1 24 my $code = shift;
196              
197 9 100       33 return unless @_;
198              
199 8         13 local $_;
200              
201 8         15 my @maximal = $_ = shift @_;
202 8         22 my $max = $code->( $_ );
203              
204 8         30 foreach ( @_ ) {
205 11         20 my $this = $code->( $_ );
206 11 100 100     55 if( $this > $max ) {
    100          
207 6         11 @maximal = $_;
208 6         11 $max = $this;
209             }
210             elsif( wantarray and $this == $max ) {
211 1         4 push @maximal, $_;
212             }
213             }
214              
215 8 100       45 return wantarray ? @maximal : $maximal[0];
216             }
217              
218             *nmax_by = \&max_by;
219              
220             =head2 min_by
221              
222             $optimal = min_by { KEYFUNC } @vals
223              
224             @optimal = min_by { KEYFUNC } @vals
225              
226             Similar to C but returns values which give the numerically smallest
227             result from the key function. Also provided as C
228              
229             =cut
230              
231             sub min_by(&@)
232             {
233 9     9 1 28 my $code = shift;
234              
235 9 100       34 return unless @_;
236              
237 8         9 local $_;
238              
239 8         21 my @minimal = $_ = shift @_;
240 8         19 my $min = $code->( $_ );
241              
242 8         34 foreach ( @_ ) {
243 12         20 my $this = $code->( $_ );
244 12 50 100     82 if( $this < $min ) {
    100          
245 0         0 @minimal = $_;
246 0         0 $min = $this;
247             }
248             elsif( wantarray and $this == $min ) {
249 1         3 push @minimal, $_;
250             }
251             }
252              
253 8 100       51 return wantarray ? @minimal : $minimal[0];
254             }
255              
256             *nmin_by = \&min_by;
257              
258             =head2 uniq_by
259              
260             @vals = uniq_by { KEYFUNC } @vals
261              
262             Returns a list of the subset of values for which the key function block
263             returns unique values. The first value yielding a particular key is chosen,
264             subsequent values are rejected.
265              
266             my @some_fruit = uniq_by { $_->colour } @fruit;
267              
268             To select instead the last value per key, reverse the input list. If the order
269             of the results is significant, don't forget to reverse the result as well:
270              
271             my @some_fruit = reverse uniq_by { $_->colour } reverse @fruit;
272              
273             Because the values returned by the key function are used as hash keys, they
274             ought to either be strings, or at least well-behaved as strings (such as
275             numbers, or object references which overload stringification in a suitable
276             manner).
277              
278             =cut
279              
280             sub uniq_by(&@)
281             {
282 7     7 1 23 my $code = shift;
283              
284 7         47 my %present;
285             return grep {
286 7         21 my $key = $code->( local $_ = $_ );
  13         34  
287 13         98 !$present{$key}++
288             } @_;
289             }
290              
291             =head2 partition_by
292              
293             %parts = partition_by { KEYFUNC } @vals
294              
295             Returns a key/value list of ARRAY refs containing all the original values
296             distributed according to the result of the key function block. Each value will
297             be an ARRAY ref containing all the values which returned the string from the
298             key function, in their original order.
299              
300             my %balls_by_colour = partition_by { $_->colour } @balls;
301              
302             Because the values returned by the key function are used as hash keys, they
303             ought to either be strings, or at least well-behaved as strings (such as
304             numbers, or object references which overload stringification in a suitable
305             manner).
306              
307             =cut
308              
309             sub partition_by(&@)
310             {
311 7     7 1 24 my $code = shift;
312              
313 7         9 my %parts;
314 7         23 push @{ $parts{ $code->( local $_ = $_ ) } }, $_ for @_;
  13         57  
315              
316 7         84 return %parts;
317             }
318              
319             =head2 count_by
320              
321             %counts = count_by { KEYFUNC } @vals
322              
323             I
324              
325             Returns a key/value list of integers, giving the number of times the key
326             function block returned the key, for each value in the list.
327              
328             my %count_of_balls = count_by { $_->colour } @balls;
329              
330             Because the values returned by the key function are used as hash keys, they
331             ought to either be strings, or at least well-behaved as strings (such as
332             numbers, or object references which overload stringification in a suitable
333             manner).
334              
335             =cut
336              
337             sub count_by(&@)
338             {
339 5     5 1 23 my $code = shift;
340              
341 5         5 my %counts;
342 5         26 $counts{ $code->( local $_ = $_ ) }++ for @_;
343              
344 5         80 return %counts;
345             }
346              
347             =head2 zip_by
348              
349             @vals = zip_by { ITEMFUNC } \@arr0, \@arr1, \@arr2,...
350              
351             Returns a list of each of the values returned by the function block, when
352             invoked with values from across each each of the given ARRAY references. Each
353             value in the returned list will be the result of the function having been
354             invoked with arguments at that position, from across each of the arrays given.
355              
356             my @transposition = zip_by { [ @_ ] } @matrix;
357              
358             my @names = zip_by { "$_[1], $_[0]" } \@firstnames, \@surnames;
359              
360             print zip_by { "$_[0] => $_[1]\n" } [ keys %hash ], [ values %hash ];
361              
362             If some of the arrays are shorter than others, the function will behave as if
363             they had C in the trailing positions. The following two lines are
364             equivalent:
365              
366             zip_by { f(@_) } [ 1, 2, 3 ], [ "a", "b" ]
367             f( 1, "a" ), f( 2, "b" ), f( 3, undef )
368              
369             The item function is called by C, so if it returns a list, the entire
370             list is included in the result. This can be useful for example, for generating
371             a hash from two separate lists of keys and values
372              
373             my %nums = zip_by { @_ } [qw( one two three )], [ 1, 2, 3 ];
374             # %nums = ( one => 1, two => 2, three => 3 )
375              
376             (A function having this behaviour is sometimes called C, e.g. in
377             Haskell, but that name would not fit the naming scheme used by this module).
378              
379             =cut
380              
381             sub zip_by(&@)
382             {
383 7     7 1 5566 my $code = shift;
384              
385 7 100       28 @_ or return;
386              
387 6         8 my $len = 0;
388 6   100     35 scalar @$_ > $len and $len = scalar @$_ for @_;
389              
390             return map {
391 6         21 my $idx = $_;
  14         36  
392 14         21 $code->( map { $_[$_][$idx] } 0 .. $#_ )
  26         60  
393             } 0 .. $len-1;
394             }
395              
396             =head2 unzip_by
397              
398             $arr0, $arr1, $arr2, ... = unzip_by { ITEMFUNC } @vals
399              
400             I
401              
402             Returns a list of ARRAY references containing the values returned by the
403             function block, when invoked for each of the values given in the input list.
404             Each of the returned ARRAY references will contain the values returned at that
405             corresponding position by the function block. That is, the first returned
406             ARRAY reference will contain all the values returned in the first position by
407             the function block, the second will contain all the values from the second
408             position, and so on.
409              
410             my ( $firstnames, $lastnames ) = unzip_by { m/^(.*?) (.*)$/ } @names;
411              
412             If the function returns lists of differing lengths, the result will be padded
413             with C in the missing elements.
414              
415             This function is an inverse of C, if given a corresponding inverse
416             function.
417              
418             =cut
419              
420             sub unzip_by(&@)
421             {
422 5     5 1 22 my $code = shift;
423              
424 5         5 my @ret;
425 5         19 foreach my $idx ( 0 .. $#_ ) {
426 12         37 my @slice = $code->( local $_ = $_[$idx] );
427 12 100       75 $#slice = $#ret if @slice < @ret;
428 12         57 $ret[$_][$idx] = $slice[$_] for 0 .. $#slice;
429             }
430              
431 5         44 return @ret;
432             }
433              
434             =head2 extract_by
435              
436             @vals = extract_by { SELECTFUNC } @arr
437              
438             I
439              
440             Removes elements from the referenced array on which the selection function
441             returns true, and returns a list containing those elements. This function is
442             similar to C, except that it modifies the referenced array to remove the
443             selected values from it, leaving only the unselected ones.
444              
445             my @red_balls = extract_by { $_->color eq "red" } @balls;
446              
447             # Now there are no red balls in the @balls array
448              
449             This function modifies a real array, unlike most of the other functions in this
450             module. Because of this, it requires a real array, not just a list.
451              
452             This function is implemented by invoking C on the array, not by
453             constructing a new list and assigning it. One result of this is that weak
454             references will not be disturbed.
455              
456             extract_by { !defined $_ } @refs;
457              
458             will leave weak references weakened in the C<@refs> array, whereas
459              
460             @refs = grep { defined $_ } @refs;
461              
462             will strengthen them all again.
463              
464             =cut
465              
466             sub extract_by(&\@)
467             {
468 6     6 1 738 my $code = shift;
469 6         10 my ( $arrref ) = @_;
470              
471 6         8 my @ret;
472 6         7 for( my $idx = 0; ; $idx++ ) {
473 43 100       147 last if $idx > $#$arrref;
474 37 100       70 next unless $code->( local $_ = $arrref->[$idx] );
475              
476 11         79 push @ret, splice @$arrref, $idx, 1, ();
477 11         16 redo;
478             }
479              
480 6         35 return @ret;
481             }
482              
483             =head2 extract_first_by
484              
485             $val = extract_first_by { SELECTFUNC } @arr
486              
487             I
488              
489             A hybrid between C and C. Removes the first
490             element from the referenced array on which the selection function returns
491             true, returning it.
492              
493             As with C, this function requires a real array and not just a
494             list, and is also implemented using C so that weak references are
495             not disturbed.
496              
497             If this function fails to find a matching element, it will return an empty
498             list in list context. This allows a caller to distinguish the case between
499             no matching element, and the first matching element being C.
500              
501             =cut
502              
503             sub extract_first_by(&\@)
504             {
505 2     2 1 710 my $code = shift;
506 2         3 my ( $arrref ) = @_;
507              
508 2         9 foreach my $idx ( 0 .. $#$arrref ) {
509 4 100       18 next unless $code->( local $_ = $arrref->[$idx] );
510              
511 1         15 return splice @$arrref, $idx, 1, ();
512             }
513              
514 1         7 return;
515             }
516              
517             =head2 weighted_shuffle_by
518              
519             @vals = weighted_shuffle_by { WEIGHTFUNC } @vals
520              
521             I
522              
523             Returns the list of values shuffled into a random order. The randomisation is
524             not uniform, but weighted by the value returned by the C. The
525             probabilty of each item being returned first will be distributed with the
526             distribution of the weights, and so on recursively for the remaining items.
527              
528             =cut
529              
530             sub weighted_shuffle_by(&@)
531             {
532 25     25 1 987 my $code = shift;
533 25         61 my @vals = @_;
534              
535 25         39 my @weights = map { $code->( local $_ = $_ ) } @vals;
  70         314  
536              
537 25         117 my @ret;
538 25         59 while( @vals > 1 ) {
539 46         44 my $total = 0; $total += $_ for @weights;
  46         112  
540 46         82 my $select = int rand $total;
541 46         564 my $idx = 0;
542 46         111 while( $select >= $weights[$idx] ) {
543 42         89 $select -= $weights[$idx++];
544             }
545              
546 46         87 push @ret, splice @vals, $idx, 1, ();
547 46         108 splice @weights, $idx, 1, ();
548             }
549              
550 25 100       56 push @ret, @vals if @vals;
551              
552 25         95 return @ret;
553             }
554              
555             =head2 bundle_by
556              
557             @vals = bundle_by { BLOCKFUNC } $number, @vals
558              
559             I
560              
561             Similar to a regular C functional, returns a list of the values returned
562             by C. Values from the input list are given to the block function in
563             bundles of C<$number>.
564              
565             If given a list of values whose length does not evenly divide by C<$number>,
566             the final call will be passed fewer elements than the others.
567              
568             =cut
569              
570             sub bundle_by(&@)
571             {
572 6     6 1 19 my $code = shift;
573 6         8 my $n = shift;
574              
575 6         7 my @ret;
576 6         25 for( my ( $pos, $next ) = ( 0, $n ); $pos < @_; $pos = $next, $next += $n ) {
577 13 100       55 $next = @_ if $next > @_;
578 13         39 push @ret, $code->( @_[$pos .. $next-1] );
579             }
580 6         61 return @ret;
581             }
582              
583             =head1 TODO
584              
585             =over 4
586              
587             =item * XS implementations
588              
589             These functions are currently all written in pure perl. Some at least, may
590             benefit from having XS implementations to speed up their logic.
591              
592             =item * Merge into L or L
593              
594             This module shouldn't really exist. The functions should instead be part of
595             one of the existing modules that already contain many list utility functions.
596             Having Yet Another List Utilty Module just worsens the problem.
597              
598             I have attempted to contact the authors of both of the above modules, to no
599             avail; therefore I decided it best to write and release this code here anyway
600             so that it is at least on CPAN. Once there, we can then see how best to merge
601             it into an existing module.
602              
603             I: As I am now the maintainer of L, some
604             amount of merging/copying should be possible. However, given the latter's key
605             position in the core F distribution and head of the "CPAN River" I am
606             keen not to do this wholesale, but a selected pick of what seems best, by a
607             popular consensus.
608              
609             =back
610              
611             =head1 AUTHOR
612              
613             Paul Evans
614              
615             =cut
616              
617             0x55AA;