File Coverage

blib/lib/Data/Sorting.pm
Criterion Covered Total %
statement 173 232 74.5
branch 80 156 51.2
condition 27 48 56.2
subroutine 37 48 77.0
pod 8 8 100.0
total 325 492 66.0


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::Sorting - Multi-key sort using function results
4              
5              
6             =head1 SYNOPSIS
7              
8             use Data::Sorting qw( :basics :arrays :extras );
9            
10             # Sorting functions default to simple string comparisons
11             @names = qw( Bob Alice Ellen Charlie David );
12             @ordered = sorted_by( undef, @names );
13            
14             # Various options can be passed before the list values
15             @ordered = sorted_by( [ -order=>'reverse' ], @names );
16              
17             # You can also generate a sorting function and then apply it
18             $function = sort_function();
19             @ordered = $function->( @names ); # or &{$function}(@names)
20             @ordered = sort_function( -order=>'reverse' )->( @names );
21            
22             # The :array functions are prototyped to take the array first
23             @ordered = sorted_array( @names );
24             @ordered = sorted_arrayref( \@names );
25              
26             # You can also sort an array in place, changing its internal order
27             sort_array( @names );
28             sort_arrayref( \@names );
29            
30             # There are several sorting options, such as -compare => 'natural'
31             @movies = ( 'The Matrix', 'Plan 9', '2001', 'Terminator 2' );
32             @ordered = sort_function( -compare => 'natural' )->( @movies );
33             # @ ordered now contains '2001', 'The Matrix', 'Plan 9', 'Terminator 2'
34            
35             # To sort numbers, pass the -compare => 'numeric' option
36             @numbers = ( 18, 5, 23, 42, 156, 91, 64 );
37             @ordered = sorted_by( [ -compare => 'numeric' ], @numbers );
38             @ordered = sort_function( -compare => 'numeric' )->( @numbers );
39             @ordered = sorted_array( @numbers, -compare => 'numeric' );
40             sort_array( @numbers, -compare => 'numeric' );
41            
42             # You can sort by the results of a function to be called on each item
43             sort_array( @numbers, -compare => 'numeric', sub { $_[0] % 16 } );
44             # @numbers now contains 64, 18, 5, 23, 42, 91, 156
45            
46             # For arrays of datastructures, pass in keys to extract for sorting
47             @records = (
48             { 'rec_id'=>3, 'name'=>{'first'=>'Bob', 'last'=>'Macy'} },
49             { 'rec_id'=>1, 'name'=>{'first'=>'Sue', 'last'=>'Jones'} },
50             { 'rec_id'=>2, 'name'=>{'first'=>'Al', 'last'=>'Jones' } },
51             );
52             @ordered = sorted_array( @records, 'rec_id' );
53              
54             # For nested data structures, pass an array of keys to fetch
55             @ordered = sorted_array( @records, ['name','first'] );
56              
57             # Pass multiple sort keys for multiple-level sorts
58             @ordered = sorted_array( @records, ['name','last'], ['name','first'] );
59            
60             # Any selected sort options are applied to all subsequent sort keys
61             @ordered = sorted_array( @records,
62             -order => 'reverse', ['name','last'], ['name','first'] );
63            
64             # Options specified within a hash-ref apply only to that key
65             @ordered = sorted_array( @records,
66             { order=>'reverse', sortkey=>['name','last'] },
67             ['name','first'] );
68            
69             # Locale support is available if you have Perl 5.004 or later and POSIX
70             POSIX::setlocale( POSIX::LC_COLLATE(), 'en_US' );
71             POSIX::setlocale( POSIX::LC_CTYPE(), 'en_US' );
72             @ordered = sorted_array( @records,
73             -compare=>'locale', ['name','last'], ['name','first'] );
74              
75              
76             =head1 ABSTRACT
77              
78             Data::Sorting provides functions to sort the contents of arrays based on a collection of extraction and comparison rules. Extraction rules are used to identify the attributes of array elements on which the ordering is based; comparison rules specify how those values should be ordered.
79              
80             Index strings may be used to retrieve values from array elements, or function references may be passed in to call on each element. Comparison rules are provided for numeric, bytewise, and case-insensitive orders, as well as a 'natural' comparison that places numbers first, in numeric order, followed by the remaining items in case-insensitive textual order.
81              
82              
83             =head1 DESCRIPTION
84              
85             This module provides several public functions with different calling interfaces that all use the same underlying sorting mechanisms.
86              
87             These functions may be imported individually or in groups using the following tags:
88              
89             =over 9
90              
91             =item :basics
92              
93             sorted_by(), sort_function(): General-purpose sorting functions.
94              
95             =item :array
96              
97             sorted_array(), sorted_arrayref(), sort_array(), sort_arrayref(): Prototyped functions for arrays.
98              
99             =item :extras
100              
101             sort_key_values(), sort_description(): Two accessory functions that explain how sorting is being carried out.
102              
103             =back
104              
105             All of these functions take a list of sorting rules as arguments. See L<"Sort Rule Syntax"> for a discussion of the contents of the $sort_rule or @sort_rules parameters shown below.
106              
107             =cut
108              
109             ########################################################################
110              
111             package Data::Sorting;
112              
113             require 5.003;
114 7     7   182863 use strict;
  7         17  
  7         276  
115 7     7   38 use Carp;
  7         14  
  7         730  
116 7     7   42 use Exporter;
  7         19  
  7         401  
117              
118 7     7   33 use vars qw( $VERSION @ISA %EXPORT_TAGS );
  7         25  
  7         1229  
119             $VERSION = 0.9;
120              
121             push @ISA, qw( Exporter );
122             %EXPORT_TAGS = (
123             basics => [qw( sorted_by sort_function )],
124             arrays => [qw( sorted_array sorted_arrayref sort_array sort_arrayref)],
125             extras => [qw( sort_key_values sort_description )],
126             );
127             Exporter::export_ok_tags( keys %EXPORT_TAGS );
128              
129 7     7   41 use vars qw( @Array @Rules $PreCalculate $Rule @ValueSet );
  7         29  
  7         5333  
130              
131             ########################################################################
132              
133             =head2 sorted_by
134              
135             @ordered = sorted_by( $sort_rule, @value_array );
136             @ordered = sorted_by( $sort_rule, @$value_arrayref );
137             @ordered = sorted_by( $sort_rule, $value1, $value2, $value3 );
138              
139             @ordered = sorted_by( \@sort_rules, @value_array );
140             @ordered = sorted_by( \@sort_rules, @$value_arrayref );
141             @ordered = sorted_by( \@sort_rules, $value1, $value2, $value3 );
142              
143             This is a general-purpose sorting function which accepts one or more sort order rules and a list of input values, then returns the values in the order specified by the rules.
144              
145             =cut
146              
147             # @in_order = sorted_by( $sort_rules_ary, @values );
148             sub sorted_by ($;@) {
149 3         10 my @sort_params = ( ! defined $_[0] ) ? () :
150 3 50   3 1 2073 ( ref($_[0]) eq 'ARRAY' ) ? @{ (shift) } :
    50          
151             shift;
152 3         10 ( my $sorter, local @Rules ) = _parse_sort_args( @sort_params );
153 3         35 local *Array = \@_;
154 3         9 &$sorter;
155             }
156              
157             ########################################################################
158              
159             =head2 sort_function
160              
161             @ordered = sort_function( @sort_rules )->( @value_array );
162             @ordered = sort_function( @sort_rules )->( @$value_arrayref );
163             @ordered = sort_function( @sort_rules )->( $value1, $value2, $value3 );
164              
165             Creates an anonymous function which applies the provided sort rules. The function may be cached and used multiple times to apply the same rules again.
166              
167             =cut
168              
169             # @in_order = sort_function( @sort_rules )->( @array );
170             sub sort_function (@) {
171 40     40 1 31625 my ( $sorter, @rules ) = _parse_sort_args( @_ );
172             return sub {
173 410     410   18999 local *Array = \@_;
174 410         1098 local @Rules = @rules;
175 410         689 my @results = &$sorter;
176             # Kludge to clear extracted data; there's gotta be a better way...
177 410         1100 foreach my $rule (@rules) {
178 498         3039 map { delete $rule->{$_} } grep /^ext_/, keys %$rule
  256         1000  
179             }
180 410         2251 @results;
181             }
182 40         261 }
183              
184             ########################################################################
185              
186             =head2 sorted_array
187              
188             @ordered = sorted_array( @value_array, @sort_rules );
189             @ordered = sorted_array( @$value_arrayref, @sort_rules );
190              
191             Returns a sorted list of the items without altering the order of the original list.
192              
193             =cut
194              
195             # @in_order = sorted_array( @array, @sort_rules );
196             sub sorted_array (\@;@) {
197 2     2 1 750 local *Array = shift;
198 2         6 ( my $sorter, local @Rules ) = _parse_sort_args( @_ );
199 2         8 &$sorter;
200             }
201              
202             =head2 sorted_arrayref
203              
204             @ordered = sorted_arrayref( \@value_array, @sort_rules );
205             @ordered = sorted_arrayref( $value_arrayref, @sort_rules );
206              
207             Returns a sorted list of the items without altering the order of the original list.
208              
209             =cut
210              
211             # @in_order = sorted_arrayref( $array_ref, @sort_rules );
212             sub sorted_arrayref ($;@) {
213 2     2 1 694 local *Array = shift;
214 2         5 ( my $sorter, local @Rules ) = _parse_sort_args( @_ );
215 2         5 &$sorter;
216             }
217              
218             ########################################################################
219              
220             =head2 sort_array
221              
222             sort_array( @value_array, @sort_rules );
223             sort_array( @$value_arrayref, @sort_rules );
224              
225             Sorts the contents of the specified array using a list of sorting rules.
226              
227             =cut
228              
229             # sort_array( @array, @sort_rules );
230             sub sort_array (\@;@) {
231 2     2 1 652 local *Array = shift;
232 2         5 ( my $sorter, local @Rules ) = _parse_sort_args( @_ );
233 2         20 @Array = &$sorter;
234             }
235              
236             =head2 sort_arrayref
237              
238             sort_arrayref( \@value_array, @sort_rules );
239             sort_arrayref( $value_arrayref, @sort_rules );
240              
241             Equivalent to sort_array, but takes an explicit array reference as its first argument, rather than an array variable.
242              
243             =cut
244              
245             # sort_arrayref( $array_ref, @sort_rules );
246             sub sort_arrayref ($;@) {
247 2     2 1 619 local *Array = shift;
248 2         5 ( my $sorter, local @Rules ) = _parse_sort_args( @_ );
249 2         5 @Array = &$sorter;
250             }
251              
252             ########################################################################
253              
254             =head2 sort_key_values
255              
256             @key_values = sort_key_values( \@value_array, @sort_rules );
257             @key_values = sort_key_values( $value_arrayref, @sort_rules );
258              
259             Doesn't actually perform any sorting. Extracts and returns the values which would be used as sort keys from each item in the array, in their original order.
260              
261             =cut
262              
263             # @results = sort_key_values( $array, @sort_rules );
264             sub sort_key_values ($;@) {
265 0     0 1 0 local *Array = shift;
266 0         0 my ($sorter, @rules) = _parse_sort_args( @_ );
267            
268 0 0       0 if ( scalar @rules == 1 ) {
269 0         0 _extract_values_for_rule( $rules[0], @Array );
270             } else {
271 0         0 map [ _extract_values_for_item( $_, @rules ) ], @Array;
272             }
273             }
274              
275             ########################################################################
276              
277             =head2 sort_description
278              
279             @description = sort_description( $descriptor, @sort_rules );
280              
281             Doesn't actually perform any sorting. Provides descriptive information about the sort rules for diagnostic purposes.
282              
283             =cut
284              
285             # @sort_rules = sort_description( 'text', @sort_rules );
286             sub sort_description ($;@) {
287 0     0 1 0 my $descriptor = shift;
288              
289 0         0 my $desc_func;
290 0 0       0 if ( ! $descriptor ) {
    0          
    0          
291 0         0 $desc_func = \&_desc_text;
292             } elsif ( ref($descriptor) eq 'CODE' ) {
293 0         0 $desc_func = $descriptor;
294             } elsif ( ! ref($descriptor) ) {
295 7     7   49 no strict 'refs';
  7         13  
  7         3947  
296 0 0       0 $desc_func = \&{"_desc_$descriptor"}
  0         0  
297             or croak("Can't find a function named '_desc_$descriptor'");
298             } else {
299 0         0 croak("Unsupported descriptor '$descriptor'")
300             }
301            
302 0         0 my ($sorter, @rules) = _parse_sort_args( @_ );
303            
304 0         0 map { &$desc_func( $_ ) } @rules;
  0         0  
305             }
306              
307             sub _desc_text {
308 0     0   0 my $rule = shift;
309              
310 0         0 my $comp = $rule->{compare};
311            
312 0         0 $rule->{extract} .
313              
314 0 0       0 join( '', map $_ ? "($_) " : " ", join(', ', map "'$_'", @{ $rule->{extract_args} }) ) .
    0          
    0          
    0          
    0          
315              
316             "in " . ( $rule->{order_sign} < 0 ? "descending" : "ascending" ) . " " .
317            
318             ( ! ref($comp) ? "$comp" :
319             ref($comp) eq 'CODE' ? "with custom function ($comp)":
320             ref($comp) eq 'ARRAY' ? join(', ', @$comp) : "with $comp" ) .
321             " order"
322             }
323              
324             ########################################################################
325              
326             =head2 Sort Rule Syntax
327              
328             The sort rule argument list may contain several different types of parameters, which are parsed identically by all of the public functions described above.
329              
330             A sort rule definition list may contain any combination of the following argument structures:
331              
332             =over 4
333              
334             =item I
335              
336             If no sort keys are specified, a default sort key is created using the C "self"> option.
337              
338             @ordered = sorted_array( @names );
339              
340             =item I
341              
342             Specifies a sort key. Each I may be either a scalar value, or an array reference. Appropriate values for a I vary depending on which "extract" option is being used, and are discussed further below.
343              
344             @ordered = sorted_array( @numbers, sub { $_[0] % 8 } );
345             @ordered = sorted_array( @records, 'rec_id' );
346             @ordered = sorted_array( @records, ['name','first'] );
347              
348             Any number of sortkeys may be provided:
349              
350             @ordered = sorted_array( @records, ['name','last'],
351             ['name','first'] );
352              
353             =item -sortkey => I
354              
355             Another way of specifying a sort key is by preceding it with the "-sortkey" flag.
356              
357             @ordered = sorted_array( @numbers, -sortkey => sub { $_[0] % 8 } );
358             @ordered = sorted_array( @records, -sortkey => ['name','last'],
359             -sortkey => ['name','first'] );
360              
361             =item { sortkey => I, I
362              
363             Additional options can be specified by passing a reference to a hash containing a sortkey and values for any number of options described in the list below.
364              
365             @ordered = sorted_array( @numbers, { sortkey => sub { abs(shift) },
366             compare => 'numeric', } );
367              
368             =item -I
369              
370             Sets a default option for any subsequent sortkeys in the argument list.
371              
372             @ordered = sorted_array( @records, -compare => 'numeric',
373             -sortkey => sub { abs(shift) });
374              
375             @ordered = sorted_array( @records, -compare => 'textual',
376             -sortkey => ['name','last'],
377             -sortkey => ['name','first'] );
378              
379             =back
380              
381             The possible I
382              
383             =over 4
384              
385             =item extract
386              
387             Determines the function which will be used to retrieve the sort key value from each item in the input list.
388              
389             =item compare
390              
391             Determines the function which will be used to order the extracted values.
392              
393             =item order
394              
395             Can be set to "reverse" or "descending" to invert the sort order. Defaults to "ascending".
396              
397             =item engine
398              
399             Determines the underlying sorting algorithm which will be used to implement the sort. Generally left blank, enabling the module to select the best one available.
400              
401             =back
402              
403             Each of these options is discussed at further length below.
404              
405             =cut
406              
407             my @DefaultState = ( order=>'ascending', compare=>'cmp', extract=>'any' );
408             my %SupportedOptions = ( map { $_=>1 } qw( engine order compare extract ) );
409             my %FunctionCache;
410              
411             sub _parse_sort_args {
412 51     51   123 my @arguments = ( @_ );
413            
414 51         73 my %state;
415             my @rules;
416 51         159 while ( scalar @arguments ) {
417 71         106 my $token = shift @arguments;
418            
419 71         402 my ( $flagname ) = ( $token =~ /^\-(\w+)$/ );
420 71 100 66     444 if ( $flagname and $SupportedOptions{$flagname} ) {
    50          
    100          
421 36         138 $state{ $flagname } = shift @arguments;
422             } elsif ( $flagname eq 'sortkey' ) {
423 0         0 push @rules, { @DefaultState, %state, 'sortkey' => shift @arguments };
424             } elsif ( ref($token) eq 'HASH' ) {
425 1         10 push @rules, { @DefaultState, %state, %$token };
426             } else {
427 34         263 push @rules, { @DefaultState, %state, 'sortkey' => $token };
428             }
429             }
430 51 100       142 if ( ! scalar @rules ) {
431 24         181 push @rules, { @DefaultState, 'extract' => 'self', %state, sortkey => [] };
432             }
433              
434 7     7   48 no strict 'refs';
  7         18  
  7         15805  
435              
436 51         107 foreach my $rule ( @rules ) {
437             # Select the appropriate comparison function
438 59         139 my $compare = $rule->{compare};
439 59 50       140 croak("Missing compare option for sorting") unless ( $compare );
440             $rule->{compare_func} = ref($compare) eq 'CODE' ? $compare :
441 59 50 33     296 $FunctionCache{"_cmp_$compare"} ||= \&{"_cmp_$compare"}
      66        
442             || croak("Can't find a function named '_cmp_$compare'");
443            
444             # Optional parameter for "reverse" or "descending" sorts
445 59 100       339 $rule->{order_sign} = ( $rule->{order} =~ /^desc|^rev/i ) ? -1 : 1;
446            
447             # Select the appropriate value extraction function
448 59         103 my $extract = $rule->{extract};
449 59 50       149 croak("Missing extract option for sorting") unless ( length $extract );
450 59 100 100     244 $extract = 'code' if ($extract eq 'any' && ref($rule->{sortkey}) eq 'CODE');
451             $rule->{extract_func} = ref($extract) eq 'CODE' ? $extract :
452 59 50 33     1114 $FunctionCache{"_ext_$extract"} ||= \&{"_ext_$extract"} ||
      66        
453             croak("Can't find a function named '_ext_$extract'");
454            
455             # Optional array of arguments to the extraction function
456 59         237 my $skey = $rule->{sortkey};
457 59 100       325 $rule->{extract_args} = ( ! defined $skey ) ? [] :
    50          
458             (ref($skey) eq 'ARRAY') ? $skey :
459             [ $skey ];
460            
461 59 100       262 if ( $extract eq 'compound' ) {
462 1         2 foreach ( 0 .. $#{ $rule->{extract_args} } / 2 ) {
  1         6  
463 2         6 my $xa = $rule->{extract_args}->[ $_ * 2 ];
464 2 50       6 if ( ! ref $xa ) {
465             $rule->{extract_args}->[$_ * 2] = $FunctionCache{"_ext_$xa"} ||=
466 2   0     29 \&{"_ext_$xa"} || croak("Can't find a function named '_ext_$xa'");
      33        
467             }
468             }
469             }
470             }
471            
472             # If $PreCalculate is set, do our lookups ahead of time for all of the items
473 58 100       392 my $engine = defined($PreCalculate) ? 'precalc' :
474             $rules[0]->{engine} ? $rules[0]->{engine} :
475             ( @rules == 1 and $rules[0]->{order_sign} > 0
476             and $rules[0]->{compare} eq 'cmp'
477             and $rules[0]->{extract} eq 'self' ) ? 'trivial' :
478 51 100 100     612 (! grep {$_->{compare} ne 'cmp' or $_->{order_sign} < 0} @rules) ? 'packed' :
    100          
    100          
    50          
    50          
479             ( scalar @rules == 1 ) ? 'precalc' :
480             'orcish' ;
481             # warn "Sorting using '$engine' engine\n";
482            
483             my $sorter = ref($engine) eq 'CODE' ? $engine :
484 51 50 33     250 $FunctionCache{"_sorted_$engine"} ||= \&{"_sorted_$engine"} ||
      66        
485             croak("No such sort mode '$engine'; can't find function '_sorted_$engine'");
486            
487 51         209 return $sorter, @rules;
488             }
489              
490             ########################################################################
491              
492             =head2 Extraction Functions
493              
494             For the extract option, you may specify one of the following Is:
495              
496             =over 4
497              
498             =item any
499              
500             The default. Based on the I may behave as the 'self', 'key', or 'method' options described below.
501              
502             =item self
503              
504             Uses the input value as the sort key, unaltered. Typically used when sorting strings or other scalar values.
505              
506             =item key
507              
508             Allows for indexing in to hash or array references, allowing you to sort a list of arrayrefs based on the Ith value in each, or to sort a list of hashrefs based on a given key.
509              
510             If the sortkey is an array reference, then the keys are looked up sequentially, allowing you to sort on the contents of a nested hash or array structure.
511              
512             =item method
513              
514             Uses the sortkey as a method name to be called on each list value, enabling you to sort objects by some calculated value.
515              
516             If the sortkey is an array reference, then the first value is used as the method name and the remaining values as arguments to that method.
517              
518             =item I
519              
520             You may pass in a reference to a custom extraction function that will be used to retrieve the sort key values for this rule. The function will be called separately for each value in the input list, receiving that current value as an argument.
521              
522             If the sortkey is an array reference, then the first value is used as the function reference and the remaining values as arguments to be passed after the item value.
523              
524             =back
525              
526             extract => self | method | key | code | CODEREF | ...
527             sortkey => - | m.name | key/idx | CODEREF | args
528              
529             =cut
530              
531             # $value = _extract_value( $item, $rule );
532             sub _extract_value {
533 352     352   4533 my ( $item, $rule ) = @_;
534 352         356 my $value = &{ $rule->{extract_func} }( $item, @{ $rule->{extract_args} } );
  352         948  
  352         882  
535 352 50       1121 return defined($value) ? $value : '';
536             }
537              
538             # $value = _extract_values_for_item( $item, @rules );
539             sub _extract_values_for_item {
540 0     0   0 my $item = shift;
541 0 0       0 map { defined($_) ? $_ : '' }
  0         0  
542 0         0 map { &{ $_->{extract_func} }( $item, @{ $_->{extract_args} } ) } @_;
  0         0  
  0         0  
543             }
544              
545             # $value = _extract_values_for_rule( $rule, @item );
546             sub _extract_values_for_rule {
547 113     113   138 my $rule = shift;
548 113 100       905 return @_ if ( $rule->{extract} eq 'self' );
549 231 50       613 map { defined($_) ? $_ : '' }
  231         417  
550 22         47 map { &{ $rule->{extract_func} }( $_, @{ $rule->{extract_args} } ) } @_;
  231         900  
  231         631  
551             }
552              
553             sub _ext_self {
554 0     0   0 my ( $item, @sortkey ) = @_;
555 0         0 return $item;
556             }
557              
558             sub _ext_split {
559 264     264   485 my ( $item, $delim, @indexes ) = @_;
560             # warn "Split '$item' with '$delim'\n";
561 264         1001 my @values = split /$delim/, $item;
562 264         1336 join $delim, @values[ @indexes ];
563             }
564              
565             sub _ext_substr {
566 88     88   146 my ( $item, @sortkey ) = @_;
567 88 50       393 $#sortkey ? substr($item, $sortkey[0], $sortkey[1] ) : substr($item, $sortkey[0] );
568             }
569              
570             sub _ext_self_code {
571 88     88   107 my ( $item, @sortkey ) = @_;
572 88         171 &$item( @sortkey );
573             }
574              
575             sub _ext_code {
576 275     275   461 my ( $item, $code, @sortkey ) = @_;
577 275         535 &$code( $item, @sortkey );
578             }
579              
580             sub _ext_method {
581 132     132   201 my ( $item, $method, @sortkey ) = @_;
582 132         321 $item->$method( @sortkey );
583             }
584              
585             sub _ext_index {
586 968     968   1418 my ( $item, @sortkey ) = @_;
587 968         2695 while ( scalar @sortkey ) {
588 1188         1311 my $index = shift @sortkey;
589              
590 1188 50       4235 if ( ! ref $item ) {
    100          
    50          
591 0         0 return;
592             } elsif ( UNIVERSAL::isa($item, 'HASH') ) {
593 924         3066 $item = $item->{$index};
594             } elsif ( UNIVERSAL::isa($item, 'ARRAY') ) {
595 264 50 66     794 carp "Use of non-numeric key '$index'"
596             unless ( $index eq '0' or $index != 0 );
597 264         761 $item = $item->[$index];
598             } else {
599 0         0 carp "Can't _ext_index from '$item' ($index)";
600             }
601              
602             }
603 968         3162 return $item;
604             }
605              
606             sub _ext_any {
607 1100     1100   1681 my ( $item, @sortkey ) = @_;
608            
609 1100 50       5454 if ( ref($item) eq 'CODE' ) {
    50          
    50          
    100          
    50          
610             # &_ext_self_code;
611 0         0 &$item( @sortkey );
612             } elsif ( ! scalar @sortkey ) {
613 0         0 return $item;
614             } elsif ( ref($sortkey[0]) eq 'CODE' ) {
615 0         0 &_ext_code;
616             } elsif ( UNIVERSAL::can( $item, $sortkey[0] ) ) {
617 132         188 &_ext_method;
618             } elsif ( ! ref $sortkey[0] ) {
619 968         1446 &_ext_index;
620             } else {
621 0         0 confess "Unsure how to extract value for sorting purposes";
622             }
623             }
624              
625             sub _ext_compound {
626 44     44   196 my $item = shift;
627 44         218 while ( scalar @_ ) {
628 88         125 my ($extr_sub, $sortkey) = ( shift, shift );
629 88 50       185 $item = &$extr_sub( $item, $sortkey ? @$sortkey : () );
630             }
631 44         163 return $item;
632             }
633              
634             ########################################################################
635              
636             =head2 Comparison Functions
637              
638             For the compare option, you may specify one of the following Is:
639              
640             =over 4
641              
642             =item cmp
643              
644             The default comparison, using Perl's default cmp operator.
645              
646             =item numeric
647              
648             A numeric comparison using Perl's <=> operator.
649              
650             =item textual
651              
652             A text-oriented comparison that ignores whitespace and capitalization.
653              
654             =item natural
655              
656             A multi-type comparison that places empty values first, then numeric values in numeric order, then non-textual values like punctuation, followed by textual values in text order. The natural ordering also includes moving subsidiary words to the end, eg "The Book of Verse" is sorted as "Book of Verse, The"
657              
658             =item locale : $three_way_cmp
659              
660             Comparator functions which use the POSIX strcoll function for ordering.
661              
662             =item lc_locale : $three_way_cmp
663              
664             A case-insensitive version of the POSIX strcoll ordering.
665              
666             =item num_lc_locale
667              
668             Like the 'natural' style, this comparison distinguishes between empty and numeric values, but uses the lc_locale function to sort the textual values.
669              
670             =item I
671              
672             You may pass in a reference to a custom comparison function that will be used to order the sort key values for this rule.
673              
674             =back
675              
676             Each of these functions may return a postive, zero, or negative value based on the relationship of the values in the $a and $b positions of the current @ValueSet array. An undefined return indicates that the comparator is unable to provide an ordering for this pair, in which case the choice will fall through to the next comparator in the list; if no comparator specifies an order, they are left in their original order.
677              
678             =cut
679              
680             # $three_way_cmp = _cmp_cmp;
681             sub _cmp_cmp {
682 342     342   931 $ValueSet[$a] cmp $ValueSet[$b]
683             }
684              
685             # $three_way_cmp = _cmp_bytewise;
686             sub _cmp_bytewise {
687 1358     1358   2981 $ValueSet[$a] cmp $ValueSet[$b]
688             }
689              
690             # $three_way_cmp = _cmp_numeric;
691             sub _cmp_numeric {
692 2110     2110   5804 $ValueSet[$a] <=> $ValueSet[$b]
693             }
694              
695             # $three_way_cmp = _cmp_empty_first;
696             sub _cmp_empty_first {
697             # If neither is empty, we have no opinion.
698             # If only one is empty, place it first
699             # If they're both empty, they're equivalent
700 0 0   0   0 ( ! length($ValueSet[$a]) )
    0          
    0          
701             ? ( ( ! length($ValueSet[$b]) ) ? 0 : -1 )
702             : ( ( ! length($ValueSet[$b]) ) ? 1 : undef );
703             }
704              
705             # $three_way_cmp = _cmp_numbers_first;
706             sub _cmp_numbers_first {
707             # Use an extra array to cache our converted value
708 0   0 0   0 $Rule->{'ext_numeric'} ||= [];
709 0         0 my $is_numeric = $Rule->{'ext_numeric'};
710              
711             # If we haven't already, check to see if the values are purely numeric
712 0 0       0 defined $is_numeric->[$a] or
713             $is_numeric->[$a] = ( $ValueSet[$a] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
714 0 0       0 defined $is_numeric->[$b] or
715             $is_numeric->[$b] = ( $ValueSet[$b] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
716            
717             # If they're both numeric, use numeric comparison,
718             # If one's numeric and the other isn't, put the number first
719             # If neither is numeric, we have no opinion
720 0 0       0 ( $is_numeric->[$a] )
    0          
    0          
721             ? ( ( $is_numeric->[$b] ) ? ( $ValueSet[$a] <=> $ValueSet[$b] ) : -1 )
722             : ( ( $is_numeric->[$b] ) ? 1 : undef );
723             }
724              
725             # $three_way_cmp = _cmp_textual;
726             sub _cmp_textual {
727             # Use an extra array to cache our converted value
728 0   0 0   0 $Rule->{'ext_textual'} ||= [];
729 0         0 my $mangled = $Rule->{'ext_textual'};
730            
731             # If we haven't already, generate a lower-case, alphanumeric-only value
732 0         0 foreach my $idx ( $a, $b ) {
733 0 0       0 next if defined $mangled->[$idx];
734 0         0 local $_ = lc( $ValueSet[$idx] );
735 0         0 tr/0-9a-z/ /cs;
736 0         0 s/\A\s+//;
737 0         0 s/\s+\Z//;
738 0         0 $mangled->[$idx] = $_
739             }
740            
741             # If both items have an alphanumeric value, compare them on that basis
742             # If one is alphanumeric and the other is punctuation/empty, put alpha last.
743 0 0       0 ( length($mangled->[$a]) )
    0          
    0          
744             ? ( length($mangled->[$b]) ? ( $mangled->[$a] cmp $mangled->[$b] ) : -1 )
745             : ( length($mangled->[$b]) ? 1 : undef );
746             }
747              
748             # $three_way_cmp = _cmp_locale
749             sub _cmp_locale {
750 0     0   0 require POSIX;
751 0         0 POSIX::strcoll( $ValueSet[$a], $ValueSet[$b] );
752             }
753              
754             # $three_way_cmp = _cmp_lc_locale
755             sub _cmp_lc_locale {
756 0     0   0 require POSIX;
757 0         0 POSIX::strcoll( lc($ValueSet[$a]), lc($ValueSet[$b]) );
758             }
759              
760             sub _cmp_make_compound {
761 7     7   18 my @comparators = @_;
762             sub {
763 0     0   0 foreach my $comparator ( @comparators ) {
764             # Call each comparison function in an attempt to establish an ordering
765 0         0 my $rc = &$comparator;
766             # If the comparator returns undef, it has no opinion; call the next one
767 0 0       0 return($rc) if defined($rc);
768             }
769             }
770 7         39 }
771              
772             {
773 7     7   82 no strict 'refs';
  7         35  
  7         8504  
774             *{'_cmp_num_lc_locale'} = _cmp_make_compound( \&_cmp_empty_first, \&_cmp_numbers_first, \&_cmp_lc_locale );
775             }
776              
777             # $three_way_cmp = _cmp_natural;
778             sub _cmp_natural {
779              
780             # If neither is empty, we have no opinion.
781             # If only one is empty, place it first
782             # If they're both empty, they're equivalent
783 2304 50   2304   5523 ( ! length($ValueSet[$a]) )
    100          
    100          
784             ? ( ( ! length($ValueSet[$b]) ) ? return 0 : return -1 )
785             : ( ( ! length($ValueSet[$b]) ) ? return 1 : undef );
786              
787             # Use an extra array to cache our converted value
788 2258   100     4270 $Rule->{'ext_numeric'} ||= [];
789 2258         2839 my $is_numeric = $Rule->{'ext_numeric'};
790              
791             # If we haven't already, check to see if the values are purely numeric
792 2258 100       5256 defined $is_numeric->[$a] or
793             $is_numeric->[$a] = ( $ValueSet[$a] =~ /\A\-?(?:\d*\.)?\d+\Z/ );
794 2258 100       4948 defined $is_numeric->[$b] or
795             $is_numeric->[$b] = ( $ValueSet[$b] =~ /\A(?:\d*\.)?\d+\Z/ );
796            
797             # If they're both numeric, use numeric comparison,
798             # If one's numeric and the other isn't, put the number first
799             # If neither is numeric, we have no opinion
800 2258 100       7350 ( $is_numeric->[$a] )
    100          
    100          
801             ? return( ( $is_numeric->[$b] ) ? ( $ValueSet[$a] <=> $ValueSet[$b] ) : -1 )
802             : ( ( $is_numeric->[$b] ) ? return 1 : undef );
803            
804             # Use an extra array to cache our converted value
805 487   100     938 $Rule->{'ext_textual'} ||= [];
806 487         685 my $mangled = $Rule->{'ext_textual'};
807            
808             # If we haven't already, generate a lower-case, alphanumeric-only value
809 487         624 foreach my $idx ( $a, $b ) {
810 974 100       2059 next if defined $mangled->[$idx];
811 220         419 local $_ = lc( $ValueSet[$idx] );
812 220         290 tr/0-9a-z/ /cs;
813 220         369 s/\A\s+//;
814 220         527 s/\s+\Z//;
815 220         266 s/\A(the)\s(.*)/$2 $1/;
816 220         528 $mangled->[$idx] = $_
817             }
818            
819             # If both items have an alphanumeric value, compare them on that basis
820             # If one is alphanumeric and the other is punctuation/empty, put alpha last.
821 487 50       2121 ( length($mangled->[$a]) )
    0          
    50          
822             ? ( length($mangled->[$b]) ? ( $mangled->[$a] cmp $mangled->[$b] ) : -1 )
823             : ( length($mangled->[$b]) ? 1 : undef );
824             }
825              
826             ########################################################################
827              
828             =head2 Ascending or Descending Order
829              
830             For the order option, you may specify one of the following Is:
831              
832             =over 4
833              
834             =item forward I ascending
835              
836             The default order, from lower values to higher ones.
837              
838             =item reverse I descending
839              
840             Reverses the ordering dictated by a sort rule.
841              
842             =back
843              
844              
845             =head2 Sorting Engines
846              
847             Depending on the specific sorting rules used in a given call, this module automatically selects an internal function that provides an appropriate approach to implementing the sort, called the sort "engine".
848              
849             You can override this selection by setting an "engine" option on the first sort key, which can either contain either the name of one of the engines, described below, or a CODEREF with equivalent behavior.
850              
851             =over 4
852              
853             =item trivial
854              
855             In the common case of sorting raw values with a cmp comparison, the fast-but-simple "trivial" engine is used, which simply applies Perl's default sorting.
856              
857             =item orcish
858              
859             For a complex multi-key sort the "orcish" engine is typically selected.
860              
861             =item precalc
862              
863             Used when there's only one sorting key.
864              
865             You may also set the $PreCalculate package variable to true to force this engine to be selected. Because the sort key values for the list are calculated before entering Perl's sort operation, there's less of a chance of possible re-entry problems due to nested uses of the sort operator, which causes a fatal error in at least some versions of Perl.
866              
867             =item packed
868              
869             Some sorts are handled with the Guttman-Rosler technique, extracting packed keys and using Perl's default sort function, which is substantially faster, but currently only a limited set of simple comparisons can be handled this way. (For more information on packed-default sorting, see http://www.sysarch.com/perl/sort_paper.html or search for "Guttman-Rosler".)
870              
871             =back
872              
873             =cut
874              
875             sub _sorted_trivial {
876 11     11   46 sort @Array
877             }
878              
879             sub _sorted_precalc {
880 113     113   182 foreach my $rule (@Rules) {
881 113         402 $rule->{ext_value} = [ _extract_values_for_rule( $rule, @Array ) ]
882             }
883 113         572 return @Array[ sort _sorted_indexes_precalc 0 .. $#Array ];
884             }
885              
886             # Compare indexes $a and $b acording to each of the specified rules
887             # $three_way_cmp = _sorted_indexes_precalc;
888             sub _sorted_indexes_precalc {
889             # implicit: $a, $b
890            
891 5822     5822   8259 RULE: foreach $Rule (@Rules) {
892 5822   50     12070 local *ValueSet = ( $Rule->{ext_value} ||= [] );
893            
894             # If the function returns zero or undef, the values are equivalent
895 5822 100       5856 my $rc = &{ $Rule->{compare_func} }
  5822         10767  
896             or next RULE;
897            
898             # Else return the comparison results, reversing them first if necessary
899 5745         11527 return $rc * $Rule->{order_sign};
900             }
901             # If the items are equivalent for all of the rules, don't change their order
902             # warn "Comparing $a and $b: '$ValueSet[$a]' " . ('=') . " '$ValueSet[$b]'\n";
903 77         153 return $a <=> $b;
904             }
905              
906             sub _sorted_orcish {
907 44     44   144 return @Array[ sort _sorted_indexes_orcish 0 .. $#Array ];
908             }
909              
910             sub _sorted_indexes_orcish {
911             # implicit: $a, $b
912            
913 204     204   314 RULE: foreach $Rule (@Rules) {
914             # If we haven't already, calculate the value of each item for this rule
915 292   100     1277 local *ValueSet = ( $Rule->{ext_value} ||= [] );
916 292 100       721 defined $ValueSet[$a] or $ValueSet[$a] = _extract_value($Array[$a], $Rule);
917 292 100       843 defined $ValueSet[$b] or $ValueSet[$b] = _extract_value($Array[$b], $Rule);
918            
919             # If the function returns zero or undef, the values are equivalent
920 292 100       290 my $rc = &{ $Rule->{compare_func} }
  292         515  
921             or next RULE;
922            
923             # Else return the comparison results, reversing them first if necessary
924 204         635 return $rc * $Rule->{order_sign};
925             }
926             # If the items are equivalent for all of the rules, don't change their order
927             # warn "Comparing $a and $b: '$ValueSet[$a]' " . ('=') . " '$ValueSet[$b]'\n";
928 0         0 return $a <=> $b;
929             }
930              
931             sub _sorted_packed {
932 253     253   260 my @packed;
933 253 100       615 if ( @Rules == 1 ) {
934 836         1727 @packed = map
935 209         428 &{ $Rules[0]->{extract_func} }( $Array[$_], @{ $Rules[0]->{extract_args} } )
  836         2776  
936             . "\0" . $_,
937             ( 0 .. $#Array );
938             } else {
939 176         246 @packed = map {
940 44         79 my $item = $Array[$_];
941 352         631 join( "\0",
942 176         240 map(&{ $_->{extract_func} }( $item, @{ $_->{extract_args} } ), @Rules),
  352         711  
943             $_
944             )
945             } ( 0 .. $#Array );
946             }
947            
948             # warn "Packed: " . join(', ', map "'$_'", @packed ) . "\n";
949            
950 253         3257 return @Array[ map substr($_, 1 + rindex $_, "\0"), sort @packed ];
951             }
952              
953             ########################################################################
954              
955             =head1 STATUS AND SUPPORT
956              
957             This release of Data::Sorting is intended for public review and feedback.
958              
959             Name DSLIP Description
960             -------------- ----- ---------------------------------------------
961             Data::
962             ::Sorting bdpfp Multi-key sort using function results
963              
964             Further information and support for this module is available at www.evoscript.org.
965              
966             Please report bugs or other problems to Ebugs@evoscript.comE.
967              
968             =head1 BUGS AND TO DO
969              
970             The following issues have been noted for future improvements:
971              
972             Convert more types of comparisons to packed-default sorts for speed.
973              
974             Further investigate the current status of the Sort::Records module.
975              
976             Add a comparator function for an alpha-numeric-spans sorting model
977             like Sort::Naturally.
978              
979             Interface to Sort::PolySort for alternate comparator styles, like
980             "name" and "usdate".
981              
982             For non-scalar values, compare referents along the lines of
983             Ref::cmpref().
984              
985             Provide better handling for nested sorts; perhaps throw an exception
986             from the inner instance to the outer, catch and set $PreCalculate,
987             then go back into the loop?
988              
989             Replace dynamic scoping with object instances for thread safety.
990             May not be necessary given changes in threading models.
991              
992             =head1 CREDITS AND COPYRIGHT
993              
994             =head2 Developed By
995              
996             M. Simon Cavalletto, simonm@cavalletto.org
997             Evolution Softworks, www.evoscript.org
998              
999             =head2 Copyright
1000              
1001             Copyright 2003 Matthew Cavalletto.
1002              
1003             Portions copyright 1996, 1997, 1998, 1999 Evolution Online Systems, Inc.
1004              
1005             =head2 License
1006              
1007             You may use, modify, and distribute this software under the same terms as Perl.
1008              
1009             =cut
1010              
1011             ########################################################################
1012              
1013             1;