| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
############################################################## |
|
2
|
|
|
|
|
|
|
# Text::StripAccents - remove non a-z chars from a string |
|
3
|
|
|
|
|
|
|
# and replace them with their a-z counterparts |
|
4
|
|
|
|
|
|
|
############################################################## |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Version information |
|
7
|
|
|
|
|
|
|
# =================== |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# 0.1 CC Apr 05 New module |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# 0.11 CC Jun 05 After feedback in cpanrating, |
|
12
|
|
|
|
|
|
|
# documented that the module is |
|
13
|
|
|
|
|
|
|
# latin1 only, and pp with no |
|
14
|
|
|
|
|
|
|
# prereqs |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
############################################################## |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package Text::StripAccents; |
|
20
|
2
|
|
|
2
|
|
1005
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
71
|
|
|
21
|
2
|
|
|
2
|
|
82
|
use vars qw (@ISA $VERSION @EXPORT); |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
142
|
|
|
22
|
2
|
|
|
2
|
|
10
|
use Exporter (); |
|
|
2
|
|
|
|
|
15
|
|
|
|
2
|
|
|
|
|
1427
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
|
25
|
|
|
|
|
|
|
@EXPORT = qw(stripaccents); |
|
26
|
|
|
|
|
|
|
$VERSION="0.11"; |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
############################################################## |
|
29
|
|
|
|
|
|
|
=pod |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 NAME |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Text::StripAccents - removes accented & special characters from strings |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
use Text::StripAccents; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $Stripaccent = Text::StripAccents->new(); |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $convertedString = $StripAccents->strip($unconvertedString); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
OR |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use Text::StripAccents; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
stripaccents($string); |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
This simple module takes accented characters and replaces them with their anglicised ASCII counterparts, e.g. Ü becomes U. It currently ONLY supports Latin1. If there are any characters I've missed out that you think should be included, please mail me and I'll add them in. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This is a pure perl module with no prerequisites. |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 PREREQS |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
None. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Text::Unaccent is a much more advanced utility to do the same job, but with a C dependency. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 CHANGES |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
0.11 - bugfix to clarify the documentation, as per Dobrica Pavlinusic's feedback. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 LICENSE |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Copyright 2005 by Charles Colbourn, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 AUTHOR |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Charles Colbourn - charlesc@g0n.net |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
(Character mapping hash supplied by Nigel Currie). |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
############################################################## |
|
81
|
|
|
|
|
|
|
# Text::Stripaccent::new - constructor |
|
82
|
|
|
|
|
|
|
############################################################## |
|
83
|
|
|
|
|
|
|
# |
|
84
|
|
|
|
|
|
|
# Takes as param the character set you are using. Latin1 |
|
85
|
|
|
|
|
|
|
# support only at present |
|
86
|
|
|
|
|
|
|
# |
|
87
|
|
|
|
|
|
|
# returns a Stripaccent object |
|
88
|
|
|
|
|
|
|
############################################################## |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub new |
|
91
|
|
|
|
|
|
|
{ |
|
92
|
1
|
|
|
1
|
0
|
12
|
my $class = shift; |
|
93
|
1
|
|
|
|
|
2
|
my $charset = shift; |
|
94
|
|
|
|
|
|
|
|
|
95
|
1
|
|
|
|
|
2
|
my %object; |
|
96
|
1
|
|
|
|
|
4
|
return bless \%object,$class; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
############################################################### |
|
100
|
|
|
|
|
|
|
# Text::Stripaccent::strip |
|
101
|
|
|
|
|
|
|
############################################################### |
|
102
|
|
|
|
|
|
|
# |
|
103
|
|
|
|
|
|
|
# Removes all accented chars from a string and replaces them |
|
104
|
|
|
|
|
|
|
# with their unaccented equivalents. |
|
105
|
|
|
|
|
|
|
# |
|
106
|
|
|
|
|
|
|
# takes a string as a param, returns a converted string |
|
107
|
|
|
|
|
|
|
# |
|
108
|
|
|
|
|
|
|
############################################################### |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub strip |
|
111
|
|
|
|
|
|
|
{ |
|
112
|
|
|
|
|
|
|
|
|
113
|
2
|
|
|
2
|
0
|
9
|
my $object = shift; |
|
114
|
2
|
|
|
|
|
3
|
my $string = shift; |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
2
|
|
|
|
|
196
|
my %IsoLatin1ToASCIITable = ("A" => "A", "À" => "A", "Á" => "A", "Â" => "A", |
|
118
|
|
|
|
|
|
|
"Ã" => "A", "Ä" => "A", "Å" => "A", |
|
119
|
|
|
|
|
|
|
"B" => "B", |
|
120
|
|
|
|
|
|
|
"C" => "C", "Ç" => "C", |
|
121
|
|
|
|
|
|
|
"D" => "D", |
|
122
|
|
|
|
|
|
|
"E" => "E", "È" => "E", "É" => "E", "Ê" => "E", |
|
123
|
|
|
|
|
|
|
"Ë" => "E", |
|
124
|
|
|
|
|
|
|
"F" => "F", |
|
125
|
|
|
|
|
|
|
"G" => "G", |
|
126
|
|
|
|
|
|
|
"H" => "H", |
|
127
|
|
|
|
|
|
|
"I" => "I", "Ì" => "I", "Í" => "I", "Î" => "I", |
|
128
|
|
|
|
|
|
|
"Ï" => "I", |
|
129
|
|
|
|
|
|
|
"J" => "J", |
|
130
|
|
|
|
|
|
|
"K" => "K", |
|
131
|
|
|
|
|
|
|
"L" => "L", |
|
132
|
|
|
|
|
|
|
"M" => "M", |
|
133
|
|
|
|
|
|
|
"N" => "N", "Ñ" => "N", |
|
134
|
|
|
|
|
|
|
"O" => "O", "Ò" => "O", "Ó" => "O", "Ô" => "O", |
|
135
|
|
|
|
|
|
|
"Õ" => "O", "Ö" => "O", |
|
136
|
|
|
|
|
|
|
"P" => "P", |
|
137
|
|
|
|
|
|
|
"Q" => "Q", |
|
138
|
|
|
|
|
|
|
"R" => "R", |
|
139
|
|
|
|
|
|
|
"S" => "S", |
|
140
|
|
|
|
|
|
|
"T" => "T", |
|
141
|
|
|
|
|
|
|
"U" => "U", "Ù" => "U", "Ú" => "U", "Û" => "U", |
|
142
|
|
|
|
|
|
|
"Ü" => "U", |
|
143
|
|
|
|
|
|
|
"V" => "V", |
|
144
|
|
|
|
|
|
|
"W" => "W", |
|
145
|
|
|
|
|
|
|
"X" => "X", |
|
146
|
|
|
|
|
|
|
"Y" => "Y", "Y" => "Y", |
|
147
|
|
|
|
|
|
|
"Z" => "Z", |
|
148
|
|
|
|
|
|
|
"a" => "a", "à" => "a", "á" => "a", "â" => "a", |
|
149
|
|
|
|
|
|
|
"ã" => "a", "ä" => "a", "å" => "a", |
|
150
|
|
|
|
|
|
|
"b" => "b", |
|
151
|
|
|
|
|
|
|
"c" => "c", "ç" => "c", |
|
152
|
|
|
|
|
|
|
"d" => "d", |
|
153
|
|
|
|
|
|
|
"e" => "e", "è" => "e", "é" => "e", "ê" => "e", |
|
154
|
|
|
|
|
|
|
"ë" => "e", |
|
155
|
|
|
|
|
|
|
"f" => "f", |
|
156
|
|
|
|
|
|
|
"g" => "g", |
|
157
|
|
|
|
|
|
|
"h" => "h", |
|
158
|
|
|
|
|
|
|
"i" => "i", "ì" => "i", "í" => "i", "î" => "i", |
|
159
|
|
|
|
|
|
|
"ï" => "i", |
|
160
|
|
|
|
|
|
|
"j" => "j", |
|
161
|
|
|
|
|
|
|
"k" => "k", |
|
162
|
|
|
|
|
|
|
"l" => "l", |
|
163
|
|
|
|
|
|
|
"m" => "m", |
|
164
|
|
|
|
|
|
|
"n" => "n", "ñ" => "n", |
|
165
|
|
|
|
|
|
|
"o" => "o", "ò" => "o", "ó" => "o", "ô" => "o", |
|
166
|
|
|
|
|
|
|
"õ" => "o", "ö" => "o", |
|
167
|
|
|
|
|
|
|
"p" => "p", |
|
168
|
|
|
|
|
|
|
"q" => "q", |
|
169
|
|
|
|
|
|
|
"r" => "r", |
|
170
|
|
|
|
|
|
|
"s" => "s", |
|
171
|
|
|
|
|
|
|
"t" => "t", |
|
172
|
|
|
|
|
|
|
"u" => "u", "ù" => "u", "ú" => "u", "û" => "u", |
|
173
|
|
|
|
|
|
|
"ü" => "u", |
|
174
|
|
|
|
|
|
|
"v" => "v", |
|
175
|
|
|
|
|
|
|
"w" => "w", |
|
176
|
|
|
|
|
|
|
"x" => "x", |
|
177
|
|
|
|
|
|
|
"y" => "y", "y" => "y", "ý" => "y", |
|
178
|
|
|
|
|
|
|
"z" => "z", |
|
179
|
|
|
|
|
|
|
"ß"=>"ss"); |
|
180
|
|
|
|
|
|
|
|
|
181
|
2
|
|
|
|
|
374
|
my @stringArray = split //,$string; |
|
182
|
2
|
|
|
|
|
39
|
foreach (@stringArray) |
|
183
|
|
|
|
|
|
|
{ |
|
184
|
972
|
100
|
|
|
|
1490
|
if ($IsoLatin1ToASCIITable{$_}) |
|
185
|
|
|
|
|
|
|
{ |
|
186
|
818
|
|
|
|
|
870
|
$_ = $IsoLatin1ToASCIITable{$_}; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
2
|
|
|
|
|
52
|
my $returnString = join '',@stringArray; |
|
191
|
|
|
|
|
|
|
|
|
192
|
2
|
|
|
|
|
107
|
return $returnString; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
################################################################ |
|
196
|
|
|
|
|
|
|
# stripaccent - function to call ::strip in non OO mode |
|
197
|
|
|
|
|
|
|
################################################################ |
|
198
|
|
|
|
|
|
|
sub stripaccents |
|
199
|
|
|
|
|
|
|
{ |
|
200
|
1
|
|
|
1
|
0
|
12
|
my $string = shift; |
|
201
|
1
|
|
|
|
|
10
|
return __PACKAGE__->strip($string); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
1; |
|
207
|
|
|
|
|
|
|
|