| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package No::Sort; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5.002; |
|
4
|
1
|
|
|
1
|
|
634
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
41
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $DEBUG); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
966
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
|
7
|
|
|
|
|
|
|
@ISA=qw(Exporter); |
|
8
|
|
|
|
|
|
|
@EXPORT=qw(no_sort); |
|
9
|
|
|
|
|
|
|
@EXPORT_OK=qw(no_xfrm no_aa_xfrm |
|
10
|
|
|
|
|
|
|
latin1_uc latin1_lc latin1_ucfirst latin1_lcfirst); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
$VERSION = sprintf("%d.%02d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/); |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
No::Sort - Norwegian sorting |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use No::Sort; |
|
22
|
|
|
|
|
|
|
@sortert = no_sort @norske_ord; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
This module provde the function no_sort() which sort a ISO-8859/1 |
|
27
|
|
|
|
|
|
|
encoded string according to Norwegian practice. The routine works |
|
28
|
|
|
|
|
|
|
like the normal perl sort routine, but the optional first argument is |
|
29
|
|
|
|
|
|
|
special. It can either be a reference to the strxfrm() function to |
|
30
|
|
|
|
|
|
|
use while sorting or a reference to a hash used to transform the words |
|
31
|
|
|
|
|
|
|
while sorting. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
You can also import the no_xfrm() function which is used for standard |
|
34
|
|
|
|
|
|
|
sorting. It can be useful to base your custom transformation function |
|
35
|
|
|
|
|
|
|
on it. If we for instance would like to sort "Aa" as "Å" we could |
|
36
|
|
|
|
|
|
|
implement it like this: |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use No::Sort qw(no_sort no_xfrm); |
|
39
|
|
|
|
|
|
|
sub my_xfrm { |
|
40
|
|
|
|
|
|
|
my $word = shift; |
|
41
|
|
|
|
|
|
|
$word =~ s/A[aA]/Å/g; |
|
42
|
|
|
|
|
|
|
$word =~ s/aa/å/g; |
|
43
|
|
|
|
|
|
|
no_xfrm($word); |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
@sorted = no_sort \&my_xfrm, @names; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
By the way, the my_xfrm shown in this example can be imported from |
|
48
|
|
|
|
|
|
|
this module under the name 'no_aa_xfrm': |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
use No::Sort qw(no_sort no_aa_xfrm); |
|
51
|
|
|
|
|
|
|
@sorted = no_sort \&no_aa_xfrm, @names; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
If you set the $No::Sort::DEBUG variable to a TRUE value, then we will |
|
54
|
|
|
|
|
|
|
make some extra noise on STDOUT while sorting. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The module can also export functions for up/down casing ISO-8859/1 |
|
57
|
|
|
|
|
|
|
strings. These functions are called latin1_uc(), latin1_lc(), |
|
58
|
|
|
|
|
|
|
latin1_ucfirst(), latin1_lcfirst(). |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
L |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 AUTHORS |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Hallvard B Furuseth , Gisle Aas |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub no_sort { |
|
71
|
3
|
|
|
3
|
0
|
1140
|
my $xfrm; # ref to sort hash |
|
72
|
3
|
100
|
|
|
|
9
|
if (ref $_[0]) { |
|
73
|
1
|
50
|
|
|
|
5
|
if (ref($_[0]) eq "CODE") { |
|
|
|
0
|
|
|
|
|
|
|
74
|
1
|
|
|
|
|
2
|
my $code = shift; |
|
75
|
1
|
|
|
|
|
6
|
@{$xfrm}{@_} = map &$code($_), @_; |
|
|
1
|
|
|
|
|
14
|
|
|
76
|
|
|
|
|
|
|
} elsif (ref($_[0]) eq "HASH") { |
|
77
|
0
|
|
|
|
|
0
|
$xfrm = shift; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
} |
|
80
|
3
|
100
|
|
|
|
15
|
@{$xfrm}{@_} = map no_xfrm($_), @_ unless $xfrm; |
|
|
2
|
|
|
|
|
50
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
3
|
100
|
|
|
|
12
|
if ($DEBUG) { |
|
83
|
2
|
50
|
|
|
|
7
|
my @s = sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_; |
|
|
106
|
|
|
|
|
217
|
|
|
84
|
2
|
|
|
|
|
331
|
printf STDERR "%-20s %s\n", "ORD", "SORTERES SOM"; |
|
85
|
2
|
|
|
|
|
374
|
print STDERR "-" x 20, " ", "-" x 40, "\n"; |
|
86
|
2
|
|
|
|
|
6
|
for (@s) { |
|
87
|
34
|
|
|
|
|
3509
|
printf STDERR "%-20s %s\n", $_, $xfrm->{$_}; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
2
|
|
|
|
|
31
|
return @s; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
1
|
50
|
|
|
|
8
|
sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_; |
|
|
85
|
|
|
|
|
167
|
|
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub no_xfrm { |
|
96
|
59
|
|
|
59
|
0
|
92
|
my $p1 = shift; |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Ikke-alfanumeriske tegn regnes som en enkelt blank |
|
99
|
|
|
|
|
|
|
# (eller sikkert litt mer komplisert, f.eks whitespace -> blank, |
|
100
|
|
|
|
|
|
|
# punktum o.l -> et annet "lite" tegn, med blanke fjernet på begge |
|
101
|
|
|
|
|
|
|
# sider, osv... |
|
102
|
59
|
|
|
|
|
62
|
$p1 =~ tr/\0-\040\177\200-\240/ /s; |
|
103
|
59
|
50
|
|
|
|
103
|
$p1 =~ tr/ 0-9_A-Za-zÀ-ÖØ-ßà-öø-ÿ/,/cs |
|
104
|
|
|
|
|
|
|
and $p1 =~ s/,[ ,]+/,/g; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# Plasser æøå i riktig rekkefølge. Tar med svensk äø også. |
|
107
|
|
|
|
|
|
|
# (Egentlig burde *alle* tegn transformeres slik at ting kommer i |
|
108
|
|
|
|
|
|
|
# riktig rekkefølge her, men da blir resten av programmet så uleselig...) |
|
109
|
59
|
|
|
|
|
55
|
$p1 =~ tr[æäøöåÆÄØÖÅ] |
|
110
|
|
|
|
|
|
|
[ååææøÅÅÆÆØ]; |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# Aksenter telles bare hvis uaksentede tegn er like |
|
113
|
59
|
|
|
|
|
53
|
my $p2 = $p1; |
|
114
|
59
|
|
|
|
|
52
|
$p2 =~ tr[ÀÁÂÃÄÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäçèéêëìíîïðñòóôõöùúûüýþÿ] |
|
115
|
|
|
|
|
|
|
[AAAAÆCEEEEIIIIDNOOOOØUUUUYTSaaaaæceeeeiiiidnooooøuuuuyty]; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Store & små bokstaver er bare forskjellig hvis alt annet er likt |
|
118
|
59
|
|
|
|
|
56
|
my $p3 = $p2; |
|
119
|
59
|
|
|
|
|
48
|
$p3 =~ tr[A-ZÆØÅ] |
|
120
|
|
|
|
|
|
|
[a-zæøå]; |
|
121
|
|
|
|
|
|
|
|
|
122
|
59
|
|
|
|
|
153
|
join("\1", $p3, $p2, $p1); |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub no_aa_xfrm { |
|
126
|
0
|
|
|
0
|
0
|
0
|
my $word = shift; |
|
127
|
0
|
|
|
|
|
0
|
$word =~ s/A[aA]/Å/g; |
|
128
|
0
|
|
|
|
|
0
|
$word =~ s/aa/å/g; |
|
129
|
0
|
|
|
|
|
0
|
no_xfrm($word); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Some additional case convertion routines that does not really have |
|
133
|
|
|
|
|
|
|
# much to do with sorting. |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub latin1_lc |
|
136
|
|
|
|
|
|
|
{ |
|
137
|
2
|
|
|
2
|
0
|
131
|
my $str = shift; |
|
138
|
2
|
|
|
|
|
6
|
$str =~ tr[A-ZÀ-ÖØ-Þ] |
|
139
|
|
|
|
|
|
|
[a-zà-öø-þ]; |
|
140
|
2
|
|
|
|
|
7
|
$str; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub latin1_uc |
|
144
|
|
|
|
|
|
|
{ |
|
145
|
2
|
|
|
2
|
0
|
254
|
my $str = shift; |
|
146
|
2
|
|
|
|
|
6
|
$str =~ tr[a-zà-öø-þ] |
|
147
|
|
|
|
|
|
|
[A-ZÀ-ÖØ-Þ]; |
|
148
|
2
|
|
|
|
|
8
|
$str; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub latin1_ucfirst |
|
152
|
|
|
|
|
|
|
{ |
|
153
|
1
|
|
|
1
|
0
|
126
|
my $str = shift; |
|
154
|
1
|
|
|
|
|
7
|
$str =~ s/(.)/latin1_uc($1)/es; |
|
|
1
|
|
|
|
|
6
|
|
|
155
|
1
|
|
|
|
|
5
|
$str; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub latin1_lcfirst |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
1
|
|
|
1
|
0
|
228
|
my $str = shift; |
|
161
|
1
|
|
|
|
|
7
|
$str =~ s/(.)/latin1_lc($1)/es; |
|
|
1
|
|
|
|
|
3
|
|
|
162
|
1
|
|
|
|
|
5
|
$str; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
1; |