File Coverage

blib/lib/Text/Phonetic.pm
Criterion Covered Total %
statement 81 89 91.0
branch 41 52 78.8
condition 17 27 62.9
subroutine 15 17 88.2
pod 4 6 66.6
total 158 191 82.7


line stmt bran cond sub pod time code
1             # ============================================================================
2             package Text::Phonetic;
3             # ============================================================================
4 10     10   176434 use Moo;
  10         38346  
  10         52  
5 10     10   7843 use utf8;
  10         51  
  10         49  
6              
7 10     10   3244 use Text::Unidecode qw();
  10         11205  
  10         205  
8 10     10   60 use Carp;
  10         19  
  10         487  
9 10     10   2648 use Module::Find;
  10         9966  
  10         584  
10 10     10   2516 use Class::Load;
  10         114688  
  10         673  
11              
12             our $AUTHORITY = 'cpan:MAROS';
13             our $VERSION = "2.09";
14              
15 10     10   176 use 5.008000;
  10         36  
16              
17             our $DEFAULT_ALGORITHM = 'Phonix';
18             our @PREDICATES_CHECKED;
19             our @AVAILABLE_ALGORITHMS = grep
20             { s/^Text::Phonetic::(.+)$/$1/x }
21             findsubmod Text::Phonetic;
22              
23             has 'unidecode' => (
24             is => 'rw',
25             default => 1,
26             required => 1,
27             documentation => q[Transliterate strings to ASCII before processing]
28             );
29              
30             after 'BUILDARGS' => sub {
31             my ($class) = @_;
32             return $class->check_predicates;
33             };
34              
35             # ----------------------------------------------------------------------------
36             # Class methods
37              
38             sub available_algorithms {
39 1     1 1 467 return @AVAILABLE_ALGORITHMS;
40             }
41              
42             sub register_algorithm {
43 0     0 0 0 my ($class,$algorithm) = @_;
44             push @AVAILABLE_ALGORITHMS,$algorithm
45 0 0       0 unless grep { $algorithm eq $_ } @AVAILABLE_ALGORITHMS;
  0         0  
46 0         0 return $algorithm;
47             }
48              
49             sub check_predicates {
50 45     45 0 81 my ($class) = @_;
51              
52 45 100 100     297 if ($class->can('_predicates')
53 28         94 && ! grep { $class eq $_ } @PREDICATES_CHECKED) {
54 9         27 my @predicates = $class->_predicates;
55 9         35 foreach my $predicate (@predicates) {
56 9         21 my $ok = Class::Load::try_load_class($predicate);
57 9 100       1432 unless ($ok) {
58 2         34 croak("Could not load '$class' phonetic algorithm: Predicate '$predicate' is missing")
59             } else {
60 7         22 push(@PREDICATES_CHECKED,$class);
61             }
62             }
63             }
64 43         472 return;
65             }
66              
67             # ----------------------------------------------------------------------------
68             # Constructor (new provided by Moo)
69              
70             sub load {
71 19     19 1 23517 my $self = shift;
72 19 100 66     110 my $params = (scalar @_ == 1 && ref($_[0]) eq 'HASH') ? shift : { @_ };
73              
74 19   66     71 my $algorithm = delete($params->{algorithm}) || $DEFAULT_ALGORITHM;
75 19         55 my $class = __PACKAGE__.'::'.$algorithm;
76              
77 19 50       50 unless (grep { $algorithm eq $_ } @AVAILABLE_ALGORITHMS) {
  125         275  
78 0         0 croak("Could not load '$algorithm' phonetic algorithm: Algorithm not available");
79             }
80              
81 19 100       65 unless (Class::Load::is_class_loaded($class)) {
82 13         691 my ($ok,$error) = Class::Load::try_load_class($class);
83 13 50       1128 unless ($ok) {
84 0         0 croak("Could not load '$algorithm' phonetic algorithm: $error")
85             }
86             }
87              
88 19         366 $class->check_predicates;
89              
90 18         283 return $class->new($params);
91             }
92              
93             # ----------------------------------------------------------------------------
94             # Public methods
95              
96             sub encode {
97 199     199 1 74919 my $self = shift;
98              
99             # Single value
100 199 100       441 if (scalar(@_) == 1) {
    50          
101 197         251 my $string = shift;
102 197 50       825 $string = Text::Unidecode::unidecode($string)
103             if ($self->unidecode);
104             return
105 197 100 100     8992 unless defined $string && $string !~ /^\s*$/;
106 176         518 return $self->_do_encode($string);
107             # Expand list
108             } elsif (scalar(@_) > 1) {
109 2         3 my @result_list;
110 2         5 foreach my $string (@_) {
111 6         11 push @result_list,$self->encode($string);
112             }
113 2 100       10 return wantarray ? @result_list : \@result_list;
114             }
115             # Fallback
116 0         0 return;
117             }
118              
119              
120             sub compare {
121 18     18 1 10537 my ($self,$string1,$string2) = @_;
122              
123 18 50 33     139 return 0 unless defined $string1 && $string1 !~ /^\s*$/;
124 18 100 66     101 return 0 unless defined $string2 && $string2 !~ /^\s*$/;
125              
126             # Extremely rare case ;-)
127 11 100       39 return 100 if ($string1 eq $string2);
128              
129 9 50       37 if ($self->unidecode) {
130 9         31 $string1 = Text::Unidecode::unidecode($string1);
131 9         160 $string2 = Text::Unidecode::unidecode($string2);
132              
133             # Also not very likely, but has to be checked
134 9 100       1361 return 99 if ($string1 eq $string2);
135             }
136              
137 7         24 my $value1 = $self->_do_encode($string1);
138 7         32 my $value2 = $self->_do_encode($string2);
139              
140 7 50 33     56 return 0 unless (defined $value1 && defined $value2);
141              
142 7         27 return $self->_do_compare($self->_do_encode($string1),$self->_do_encode($string2));
143             }
144              
145             sub _do_compare {
146 3     3   14 my ($self,$result1,$result2) = @_;
147              
148 3 100       14 return 50 if ($result1 eq $result2);
149 1         5 return 0;
150             }
151              
152             sub _do_encode {
153 0     0   0 carp('_do_encode is an abstract method!');
154             }
155              
156             # ----------------------------------------------------------------------------
157             # Utility functions
158              
159             sub _is_inlist {
160 1091     1091   2280 my $string = shift;
161 1091 50       1574 return 0 unless defined $string;
162 1091 100 66     2178 my $list = (scalar @_ == 1 && ref($_[0]) eq 'ARRAY') ? shift : \@_;
163              
164 1091 100       1591 return 1 if grep {$string eq $_ } @$list;
  4683         7229  
165 856         2305 return 0;
166             }
167              
168             sub _compare_list {
169 6     6   16 my ($list1,$list2) = @_;
170              
171 6 50 33     45 return 0 unless ref($list1) eq 'ARRAY' && ref($list2) eq 'ARRAY';
172              
173 6         18 foreach my $element1 (@$list1) {
174 8 100       20 next unless defined $element1;
175 7         15 foreach my $element2 (@$list2) {
176 13 100       25 next unless defined $element2;
177 12 100       50 return 1
178             if $element1 eq $element2;
179             }
180             }
181              
182 3         14 return 0;
183             }
184              
185             "Schmitt ~ Smith ~ Schmitz";
186              
187             =encoding utf8
188              
189             =pod
190              
191             =head1 NAME
192              
193             Text::Phonetic - A base class for phonetic algorithms
194              
195             =head1 SYNOPSIS
196              
197             use Text::Phonetic::Metaphone;
198              
199             my $phonetic = Text::Phonetic::Metaphone->new();
200             $encoded_string = $phonetic->encode($string);
201             @encoded_list = $phonetic->encode(@list);
202              
203             my $same = $phonetic->compare($string1,$string2);
204              
205             Or
206              
207             use Text::Phonetic;
208             my $phonetic = Text::Phonetic->load( algorithm => 'Phonix' );
209             $encoded_string = $phonetic->encode($string);
210              
211             This module provides an easy and convinient way to encode names with various
212             phonetic algorithms. It acts as a wrapper around other phonetic algorithm
213             modules like L, L, L
214             and also implements some other algorithms such as
215             L, L,
216             L and L.
217              
218             This module can easily be subclassed.
219              
220             =head1 DESCRIPTION
221              
222             =head2 Constructors
223              
224             =head3 new
225              
226             $obj = Text::Phonetic::SUBCLASS->new(%PARAMETERS)
227              
228             You can pass arbitrary attributes to the constructor. The only global
229             attribute is C which defaults to 1 if not set. This attribute
230             controls if non-latin characters should be transliterated to A-Z
231             (see also L).
232              
233             Additional attributes may be defined by the various implementation classes.
234              
235             =head3 load
236              
237             $obj = Text::Phonetic->load(algorithm => $algorithm, %PARAMETERS)
238              
239             Alternative constructor which also loads the requested algorithm subclass.
240              
241             =head2 Methods
242              
243             =head3 encode
244              
245             $RETURN_STRING = $obj->encode($STRING);
246             OR
247             @RETURN_LIST = $obj->encode(@LIST);
248             OR
249             $RETURN_LIST_REF = $obj->encode(@LIST);
250              
251             Encodes the given string or list of strings. Returns a single value, array or
252             array reference depending on the caller context and parameters.
253              
254             Returns undef on an empty/undefined/whitespace only string.
255              
256             =head3 compare
257              
258             $RETURN_CODE = $obj->compare($STRING1,$STRING2);
259              
260             The return code is an integer between 100 and 0 indicating the likelihood that
261             the to results are the same. 100 means that the strings are completely
262             identical. 99 means that the strings match after all non-latin characters
263             have been transliterated. Values in between 98 and 1 usually mean that the
264             given strings match. 0 means that the used alogorithm couldn't match the two
265             strings at all.
266             C is a shortcut to the C<$obj-E_do_compare($CODE1,$CODE2)> method.
267              
268             =head2 Class Methods
269              
270             =head3 available_algorithms
271              
272             my @available = Text::Phonetic->available_algorithms;
273              
274             Returns a list of all available/installed algorithms
275              
276             =head1 SUBLCASSING
277              
278             You can easily subclass Text::Phonetic and add your own phonetic algorithm.
279             All subclasses must use Text::Phonetic as their base class, reside in
280             the Text::Phonetic namespace, and implement the following methods:
281              
282             =head2 _do_encode
283              
284             $RESULT = $obj->_do_encode($STRING);
285              
286             This method does the actual encoding. It should return either a string or
287             an array reference.
288              
289             =head2 _do_compare
290              
291             $RETURN_STRING = $obj->_do_compare($RESULT1,$RESULT2);
292              
293             If your C<_do_encode> method doesn't return a single scalar value you also
294             might need to implement a comparison method. It takes two results as returned
295             by C<_do_encode> and returns an integer value between 98 and 0
296             (see L<"compare">).
297              
298             =head2 _predicates
299              
300             Third party modules can be marked as predicates by adding the C<_predicates>
301             method which should return al list of package names. All predicates will be
302             loaded if installed. If missing an exception will be thrown.
303              
304             =head2 Object structure
305              
306             Text::Phonetic uses L to declare attributes.
307              
308             =head2 Helper class methods
309              
310             =head3 _compare_list
311              
312             Text::Phonetic::_compare_list($LIST1_REF,$LIST2_REF);
313              
314             Compares the two arrays and returns true if at least one element is equal
315             (ignoring the position) in both lists.
316              
317             =head2 Example class
318              
319             package Text::Phonetic::MyAlgorithm;
320             use Moo;
321             extends qw(Text::Phonetic);
322              
323             has someattribute => (
324             is => 'rw',
325             );
326              
327             sub _do_encode {
328             my ($self,$string) = @_;
329             # Do something
330             return $phonetic_representation;
331             }
332              
333             __PACKAGE__->meta->make_immutable;
334             no Moo;
335             1;
336              
337             =head1 SEE ALSO
338              
339             L (Build phonetic indices via DBIx::Class),
340             L (Phonetic encoding for video game titles)
341              
342             =head1 SUPPORT
343              
344             Please report any bugs or feature requests to C, or
345             through the web interface at
346             L.
347             I will be notified, and then you'll automatically be notified of progress on
348             your report as I make changes.
349              
350             =head1 AUTHOR
351              
352             Maroš Kollár
353             CPAN ID: MAROS
354             maros [at] k-1.com
355              
356             http://www.k-1.com
357              
358             =head1 COPYRIGHT
359              
360             Text::Phonetic is Copyright (c) 2006-2012 Maroš Kollár
361             - L
362              
363             =head1 LICENCE
364              
365             This library is free software, you can redistribute it and/or modify
366             it under the same terms as Perl itself.
367              
368             =cut