File Coverage

blib/lib/Sort/filevercmp.pm
Criterion Covered Total %
statement 73 75 97.3
branch 40 48 83.3
condition 32 39 82.0
subroutine 10 10 100.0
pod 2 2 100.0
total 157 174 90.2


line stmt bran cond sub pod time code
1             package Sort::filevercmp;
2              
3 1     1   598 use strict;
  1         3  
  1         34  
4 1     1   20 use warnings;
  1         4  
  1         38  
5 1     1   7 use Exporter 'import';
  1         3  
  1         1370  
6              
7             our $VERSION = '0.001';
8              
9             our @EXPORT = 'filevercmp';
10             our @EXPORT_OK = 'fileversort';
11              
12 55     55 1 28098 sub filevercmp ($$) { _filevercmp(_parse($_[0]), _parse($_[1])) }
13              
14             sub fileversort {
15 1     1 1 599 my @parsed = map { _parse($_) } @_;
  56         103  
16 1         9 return @_[sort { _filevercmp($parsed[$a], $parsed[$b]) } 0..$#_];
  55         95  
17             }
18              
19             # Parse strings into metadata
20             sub _parse {
21 166     166   333 my ($name) = @_;
22 166 50       384 $name = '' unless defined $name;
23            
24 166 100 100     910 return { name => $name, special => 1 } if $name eq '' or $name eq '.' or $name eq '..';
      100        
25            
26 158         252 my %meta;
27 158         307 $meta{name} = $name;
28            
29 158         392 $meta{hidden} = $name =~ s/^\.//;
30            
31 158         253 my (@prefix_parts, @all_parts);
32            
33             # Parse name into pairs of non-digit and digit parts
34 158         247 my $with_suffix = $name;
35 158   100     895 while ($with_suffix =~ s/^([^0-9]*)([0-9]*)// and (length $1 or length $2)) {
      66        
36 431         2759 push @all_parts, $1, $2;
37             }
38            
39 158         336 $meta{all_parts} = \@all_parts;
40            
41             # Parse name into pairs without suffix
42 158         232 my $prefix = $name;
43 158 50       745 if ($prefix =~ s/(?:\.[A-Za-z~][A-Za-z0-9~]*)*$//) {
44 158         257 my $without_suffix = $prefix;
45 158   100     696 while ($without_suffix =~ s/^([^0-9]*)([0-9]*)// and (length $1 or length $2)) {
      66        
46 320         2146 push @prefix_parts, $1, $2;
47             }
48             } else {
49 0         0 @prefix_parts = @all_parts;
50             }
51            
52 158         341 $meta{prefix} = $prefix;
53 158         240 $meta{prefix_parts} = \@prefix_parts;
54            
55 158         406 return \%meta;
56             }
57              
58             # tilde sorts first even before end of string, then letters, then everything else
59             sub _lexorder {
60 1172     1172   1875 my ($char) = @_;
61 1172 50       2647 return 0 if $char =~ m/\A[0-9]\z/;
62 1172 100       2846 return ord $char if $char =~ m/\A[a-zA-Z]\z/;
63 342 100       692 return -1 if $char eq '~';
64 322         525 return ord($char) + ord('z') + 1;
65             }
66              
67             sub _lexcmp {
68 150     150   252 my ($alex, $blex) = @_;
69 150         428 my @achars = split '', $alex;
70 150         336 my @bchars = split '', $blex;
71 150   100     389 while (@achars or @bchars) {
72 612         1117 my ($achar, $bchar) = (shift(@achars), shift(@bchars));
73 612 100       1375 my $aord = defined $achar ? _lexorder($achar) : 0;
74 612 100       1337 my $bord = defined $bchar ? _lexorder($bchar) : 0;
75 612         892 my $charcmp = $aord <=> $bord;
76 612 100       1873 return $charcmp if $charcmp;
77             }
78 64         130 return 0;
79             }
80              
81             # Based on verrevcmp() from GNU filevercmp
82             sub _verrevcmp {
83 102 50   102   138 my @aparts = @{$_[0] || []};
  102         367  
84 102 50       171 my @bparts = @{$_[1] || []};
  102         340  
85 102   66     266 while (@aparts or @bparts) {
86             # Lexical part
87 150         309 my ($alex, $blex) = (shift(@aparts), shift(@bparts));
88 150 100       332 $alex = '' unless defined $alex;
89 150 100       301 $blex = '' unless defined $blex;
90 150         299 my $lexcmp = _lexcmp($alex, $blex);
91 150 100       548 return $lexcmp if $lexcmp;
92            
93             # Numeric part
94 64         128 my ($anum, $bnum) = (shift(@aparts), shift(@bparts));
95 64 50 33     244 $anum = 0 unless defined $anum and length $anum;
96 64 50 33     223 $bnum = 0 unless defined $bnum and length $bnum;
97 64         123 my $numcmp = $anum <=> $bnum;
98 64 100       252 return $numcmp if $numcmp;
99             }
100 0         0 return 0;
101             }
102              
103             # Based on filevercmp() from GNU filevercmp
104             sub _filevercmp {
105 110     110   200 my ($first, $second) = @_;
106 110 50       265 return 0 if $first->{name} eq $second->{name};
107            
108             # Special files go first (empty string, ., or ..)
109             return $first->{name} cmp $second->{name}
110 110 100 100     320 if $first->{special} and $second->{special};
111 106 100       229 return -1 if $first->{special};
112 105 100       214 return 1 if $second->{special};
113            
114             # Hidden files go before unhidden
115 104 100 100     282 return -1 if $first->{hidden} and !$second->{hidden};
116 103 100 100     389 return 1 if !$first->{hidden} and $second->{hidden};
117            
118             # Compare parts, including suffixes only if prefixes are equal
119 102 100       224 if ($first->{prefix} eq $second->{prefix}) {
120 18         38 return _verrevcmp($first->{all_parts}, $second->{all_parts});
121             } else {
122 84         175 return _verrevcmp($first->{prefix_parts}, $second->{prefix_parts});
123             }
124             }
125              
126             1;
127              
128             =head1 NAME
129              
130             Sort::filevercmp - Sort version strings as in GNU filevercmp
131              
132             =head1 SYNOPSIS
133              
134             use Sort::filevercmp;
135             my @sorted = sort filevercmp 'foo-bar-1.2a.tar.gz', 'foo-bar-1.10.zip';
136             my $cmp = filevercmp 'a1b2c3.tar', 'a1b2c3.tar~';
137             say $cmp ? $cmp < 0 ? 'First name' : 'Second name' : 'Names are equal';
138            
139             # Pre-parse list for faster sorting
140             use Sort::filevercmp 'fileversort';
141             my @sorted = fileversort @filenames;
142              
143             =head1 DESCRIPTION
144              
145             Perl implementation of the C function from
146             L. C is used by the
147             L (C<-V> option) and L (C<-v> option) GNU coreutils commands
148             for "natural" sorting of strings (usually filenames) containing mixed version
149             numbers and filename suffixes.
150              
151             =head1 FUNCTIONS
152              
153             =head2 filevercmp
154              
155             my $cmp = filevercmp $string1, $string2;
156             my @sorted = sort filevercmp @strings;
157              
158             Takes two strings and returns -1 if the first string sorts first, 1 if the
159             second string sorts first, or 0 if the strings sort equivalently. Can be passed
160             to L directly as a comparison function. Exported by
161             default.
162              
163             =head2 fileversort
164              
165             my @sorted = fileversort @strings;
166              
167             Takes a list of strings and sorts them according to L. The
168             strings are pre-parsed so it may be more efficient than using L
169             as a sort comparison function. Exported by request.
170              
171             =head1 ALGORITHM
172              
173             The sort algorithm works roughly as follows:
174              
175             =over
176              
177             =item 1
178              
179             Empty strings, C<.>, and C<..> go first
180              
181             =item 2
182              
183             Hidden files (strings beginning with C<.>) go next, and are sorted among
184             themselves according to the remaining rules
185              
186             =item 3
187              
188             Each string is split into sequences of non-digit characters and digit (C<0-9>)
189             characters, ignoring any filename suffix as matched by the regex
190             C, unless the strings to be compared are
191             equal with the suffixes removed.
192              
193             =item 4
194              
195             The first non-digit sequence of the first string is compared lexically with
196             that of the second string, with letters (C) sorting first and other
197             characters sorting after, ordered by character ordinals. The tilde (C<~>)
198             character sorts before all other characters, even the end of the sequence.
199             Continue if the non-digit sequences are lexically equal.
200              
201             =item 5
202              
203             The first digit sequence of the first string is compared numerically with that
204             of the second string, ignoring leading zeroes. Continue if the digit sequences
205             are numerically equal.
206              
207             =item 6
208              
209             Repeat steps 4 and 5 with the remaining sequences.
210              
211             =back
212              
213             =head1 CAVEATS
214              
215             This sort algorithm ignores the current locale, and has unique rules for
216             lexically sorting the non-digit components of the strings, designed for sorting
217             filenames. There are better options for general version string sorting; see
218             L.
219              
220             =head1 BUGS
221              
222             Report any issues on the public bugtracker.
223              
224             =head1 AUTHOR
225              
226             Dan Book
227              
228             =head1 COPYRIGHT AND LICENSE
229              
230             This software is Copyright (c) 2017 by Dan Book.
231              
232             This is free software, licensed under:
233              
234             The Artistic License 2.0 (GPL Compatible)
235              
236             =head1 SEE ALSO
237              
238             =over
239              
240             =item *
241              
242             L - for comparing Perl version strings
243              
244             =item *
245              
246             L - for comparing standard version strings
247              
248             =item *
249              
250             L - locale-sensitive natural sorting of mixed strings
251              
252             =back