File Coverage

blib/lib/Text/StripAccents.pm
Criterion Covered Total %
statement 24 24 100.0
branch 2 2 100.0
condition n/a
subroutine 6 6 100.0
pod 0 3 0.0
total 32 35 91.4


line stmt bran cond sub pod time code
1             ##############################################################
2             # Text::StripAccents - remove non a-z chars from a string
3             # and replace them with their a-z counterparts
4             ##############################################################
5             #
6             # Version information
7             # ===================
8             #
9             # 0.1 CC Apr 05 New module
10             #
11             # 0.11 CC Jun 05 After feedback in cpanrating,
12             # documented that the module is
13             # latin1 only, and pp with no
14             # prereqs
15             #
16             ##############################################################
17              
18              
19             package Text::StripAccents;
20 2     2   1005 use strict;
  2         3  
  2         71  
21 2     2   82 use vars qw (@ISA $VERSION @EXPORT);
  2         5  
  2         142  
22 2     2   10 use Exporter ();
  2         15  
  2         1427  
23              
24             @ISA = qw(Exporter);
25             @EXPORT = qw(stripaccents);
26             $VERSION="0.11";
27              
28             ##############################################################
29             =pod
30              
31             =head1 NAME
32              
33             Text::StripAccents - removes accented & special characters from strings
34              
35             =head1 SYNOPSIS
36              
37             use Text::StripAccents;
38              
39             my $Stripaccent = Text::StripAccents->new();
40              
41             my $convertedString = $StripAccents->strip($unconvertedString);
42              
43             OR
44              
45             use Text::StripAccents;
46              
47             stripaccents($string);
48              
49             =head1 DESCRIPTION
50              
51             This simple module takes accented characters and replaces them with their anglicised ASCII counterparts, e.g. Ü becomes U. It currently ONLY supports Latin1. If there are any characters I've missed out that you think should be included, please mail me and I'll add them in.
52              
53             This is a pure perl module with no prerequisites.
54              
55             =head1 PREREQS
56              
57             None.
58              
59             =head1 SEE ALSO
60              
61             Text::Unaccent is a much more advanced utility to do the same job, but with a C dependency.
62              
63             =head1 CHANGES
64              
65             0.11 - bugfix to clarify the documentation, as per Dobrica Pavlinusic's feedback.
66              
67             =head1 LICENSE
68              
69             Copyright 2005 by Charles Colbourn, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself.
70              
71             =head1 AUTHOR
72              
73             Charles Colbourn - charlesc@g0n.net
74              
75             (Character mapping hash supplied by Nigel Currie).
76              
77             =cut
78              
79              
80             ##############################################################
81             # Text::Stripaccent::new - constructor
82             ##############################################################
83             #
84             # Takes as param the character set you are using. Latin1
85             # support only at present
86             #
87             # returns a Stripaccent object
88             ##############################################################
89              
90             sub new
91             {
92 1     1 0 12 my $class = shift;
93 1         2 my $charset = shift;
94              
95 1         2 my %object;
96 1         4 return bless \%object,$class;
97             }
98              
99             ###############################################################
100             # Text::Stripaccent::strip
101             ###############################################################
102             #
103             # Removes all accented chars from a string and replaces them
104             # with their unaccented equivalents.
105             #
106             # takes a string as a param, returns a converted string
107             #
108             ###############################################################
109              
110             sub strip
111             {
112              
113 2     2 0 9 my $object = shift;
114 2         3 my $string = shift;
115              
116              
117 2         196 my %IsoLatin1ToASCIITable = ("A" => "A", "À" => "A", "Á" => "A", "Â" => "A",
118             "Ã" => "A", "Ä" => "A", "Å" => "A",
119             "B" => "B",
120             "C" => "C", "Ç" => "C",
121             "D" => "D",
122             "E" => "E", "È" => "E", "É" => "E", "Ê" => "E",
123             "Ë" => "E",
124             "F" => "F",
125             "G" => "G",
126             "H" => "H",
127             "I" => "I", "Ì" => "I", "Í" => "I", "Î" => "I",
128             "Ï" => "I",
129             "J" => "J",
130             "K" => "K",
131             "L" => "L",
132             "M" => "M",
133             "N" => "N", "Ñ" => "N",
134             "O" => "O", "Ò" => "O", "Ó" => "O", "Ô" => "O",
135             "Õ" => "O", "Ö" => "O",
136             "P" => "P",
137             "Q" => "Q",
138             "R" => "R",
139             "S" => "S",
140             "T" => "T",
141             "U" => "U", "Ù" => "U", "Ú" => "U", "Û" => "U",
142             "Ü" => "U",
143             "V" => "V",
144             "W" => "W",
145             "X" => "X",
146             "Y" => "Y", "Y" => "Y",
147             "Z" => "Z",
148             "a" => "a", "à" => "a", "á" => "a", "â" => "a",
149             "ã" => "a", "ä" => "a", "å" => "a",
150             "b" => "b",
151             "c" => "c", "ç" => "c",
152             "d" => "d",
153             "e" => "e", "è" => "e", "é" => "e", "ê" => "e",
154             "ë" => "e",
155             "f" => "f",
156             "g" => "g",
157             "h" => "h",
158             "i" => "i", "ì" => "i", "í" => "i", "î" => "i",
159             "ï" => "i",
160             "j" => "j",
161             "k" => "k",
162             "l" => "l",
163             "m" => "m",
164             "n" => "n", "ñ" => "n",
165             "o" => "o", "ò" => "o", "ó" => "o", "ô" => "o",
166             "õ" => "o", "ö" => "o",
167             "p" => "p",
168             "q" => "q",
169             "r" => "r",
170             "s" => "s",
171             "t" => "t",
172             "u" => "u", "ù" => "u", "ú" => "u", "û" => "u",
173             "ü" => "u",
174             "v" => "v",
175             "w" => "w",
176             "x" => "x",
177             "y" => "y", "y" => "y", "ý" => "y",
178             "z" => "z",
179             "ß"=>"ss");
180              
181 2         374 my @stringArray = split //,$string;
182 2         39 foreach (@stringArray)
183             {
184 972 100       1490 if ($IsoLatin1ToASCIITable{$_})
185             {
186 818         870 $_ = $IsoLatin1ToASCIITable{$_};
187             }
188             }
189              
190 2         52 my $returnString = join '',@stringArray;
191              
192 2         107 return $returnString;
193             }
194              
195             ################################################################
196             # stripaccent - function to call ::strip in non OO mode
197             ################################################################
198             sub stripaccents
199             {
200 1     1 0 12 my $string = shift;
201 1         10 return __PACKAGE__->strip($string);
202             }
203              
204              
205              
206             1;
207