File Coverage

blib/lib/Text/Phonetic.pm
Criterion Covered Total %
statement 81 89 91.0
branch 41 52 78.8
condition 17 27 62.9
subroutine 15 17 88.2
pod 4 6 66.6
total 158 191 82.7


line stmt bran cond sub pod time code
1             # ============================================================================
2             package Text::Phonetic;
3             # ============================================================================
4 10     10   111162 use Moo;
  10         45807  
  10         42  
5 10     10   7758 use utf8;
  10         32  
  10         42  
6              
7 10     10   4629 use Text::Unidecode qw();
  10         8969  
  10         189  
8 10     10   43 use Carp;
  10         12  
  10         477  
9 10     10   4093 use Module::Find;
  10         8734  
  10         501  
10 10     10   3859 use Class::Load;
  10         107046  
  10         540  
11              
12             our $AUTHORITY = 'cpan:MAROS';
13             our $VERSION = "2.07";
14              
15 10     10   136 use 5.008000;
  10         22  
16              
17             our $DEFAULT_ALGORITHM = 'Phonix';
18             our @PREDICATES_CHECKED;
19             our @AVAILABLE_ALGORITHMS = grep { s/^Text::Phonetic::(.+)$/$1/x }
20             findsubmod Text::Phonetic;
21              
22             has 'unidecode' => (
23             is => 'rw',
24             default => 1,
25             required => 1,
26             documentation => q[Transliterate strings to ASCII before processing]
27             );
28              
29             after 'BUILDARGS' => sub {
30             my ($class) = @_;
31             return $class->check_predicates;
32             };
33              
34             __PACKAGE__->meta->make_immutable;
35              
36             # ----------------------------------------------------------------------------
37             # Class methods
38              
39             sub available_algorithms {
40 1     1 1 559 return @AVAILABLE_ALGORITHMS;
41             }
42              
43             sub register_algorithm {
44 0     0 0 0 my ($class,$algorithm) = @_;
45             push @AVAILABLE_ALGORITHMS,$algorithm
46 0 0       0 unless grep { $algorithm eq $_ } @AVAILABLE_ALGORITHMS;
  0         0  
47 0         0 return $algorithm;
48             }
49              
50             sub check_predicates {
51 45     45 0 52 my ($class) = @_;
52            
53 45 100 100     254 if ($class->can('_predicates')
54 28         75 && ! grep { $class eq $_ } @PREDICATES_CHECKED) {
55 9         21 my @predicates = $class->_predicates;
56 9         18 foreach my $predicate (@predicates) {
57 9         37 my $ok = Class::Load::try_load_class($predicate);
58 9 100       1250 unless ($ok) {
59 2         36 croak("Could not load '$class' phonetic algorithm: Predicate '$predicate' is missing")
60             } else {
61 7         17 push(@PREDICATES_CHECKED,$class);
62             }
63             }
64             }
65 43         400 return;
66             }
67              
68             # ----------------------------------------------------------------------------
69             # Constructor (new provided by Moo)
70              
71             sub load {
72 19     19 1 20706 my $self = shift;
73 19 100 66     88 my $params = (scalar @_ == 1 && ref($_[0]) eq 'HASH') ? shift : { @_ };
74            
75 19   66     52 my $algorithm = delete($params->{algorithm}) || $DEFAULT_ALGORITHM;
76 19         36 my $class = __PACKAGE__.'::'.$algorithm;
77            
78 19 50       26 unless (grep { $algorithm eq $_ } @AVAILABLE_ALGORITHMS) {
  125         148  
79 0         0 croak("Could not load '$algorithm' phonetic algorithm: Algorithm not available");
80             }
81            
82 19 100       43 unless (Class::Load::is_class_loaded($class)) {
83 13         505 my ($ok,$error) = Class::Load::try_load_class($class);
84 13 50       904 unless ($ok) {
85 0         0 croak("Could not load '$algorithm' phonetic algorithm: $error")
86             }
87             }
88            
89 19         258 $class->check_predicates;
90            
91 18         250 return $class->new($params);
92             }
93              
94             # ----------------------------------------------------------------------------
95             # Public methods
96              
97             sub encode {
98 134     134 1 34028 my $self = shift;
99            
100             # Single value
101 134 100       247 if (scalar(@_) == 1) {
    50          
102 132         109 my $string = shift;
103 132 50       541 $string = Text::Unidecode::unidecode($string)
104             if ($self->unidecode);
105             return
106 132 100 100     6812 unless defined $string && $string !~ /^\s*$/;
107 111         253 return $self->_do_encode($string);
108             # Expand list
109             } elsif (scalar(@_) > 1) {
110 2         1 my @result_list;
111 2         4 foreach my $string (@_) {
112 6         9 push @result_list,$self->encode($string);
113             }
114 2 100       7 return wantarray ? @result_list : \@result_list;
115             }
116             # Fallback
117 0         0 return;
118             }
119              
120              
121             sub compare {
122 18     18 1 5235 my ($self,$string1,$string2) = @_;
123              
124 18 50 33     112 return 0 unless defined $string1 && $string1 !~ /^\s*$/;
125 18 100 66     92 return 0 unless defined $string2 && $string2 !~ /^\s*$/;
126              
127             # Extremely rare case ;-)
128 11 100       26 return 100 if ($string1 eq $string2);
129              
130 9 50       33 if ($self->unidecode) {
131 9         21 $string1 = Text::Unidecode::unidecode($string1);
132 9         122 $string2 = Text::Unidecode::unidecode($string2);
133            
134             # Also not very likely, but has to be checked
135 9 100       1238 return 99 if ($string1 eq $string2);
136             }
137            
138 7         18 my $value1 = $self->_do_encode($string1);
139 7         23 my $value2 = $self->_do_encode($string2);
140            
141 7 50 33     36 return 0 unless (defined $value1 && defined $value2);
142            
143 7         16 return $self->_do_compare($self->_do_encode($string1),$self->_do_encode($string2));
144             }
145            
146             sub _do_compare {
147 3     3   8 my ($self,$result1,$result2) = @_;
148            
149 3 100       16 return 50 if ($result1 eq $result2);
150 1         4 return 0;
151             }
152              
153             sub _do_encode {
154 0     0   0 carp('_do_encode is an abstract method!');
155             }
156              
157             # ----------------------------------------------------------------------------
158             # Utility functions
159              
160             sub _is_inlist {
161 608     608   1329 my $string = shift;
162 608 50       700 return 0 unless defined $string;
163 608 100 66     993 my $list = (scalar @_ == 1 && ref($_[0]) eq 'ARRAY') ? shift : \@_;
164            
165 608 100       525 return 1 if grep {$string eq $_ } @$list;
  1930         2149  
166 527         987 return 0;
167             }
168              
169             sub _compare_list {
170 6     6   9 my ($list1,$list2) = @_;
171              
172 6 50 33     39 return 0 unless ref($list1) eq 'ARRAY' && ref($list2) eq 'ARRAY';
173              
174 6         10 foreach my $element1 (@$list1) {
175 8 100       13 next unless defined $element1;
176 7         10 foreach my $element2 (@$list2) {
177 13 100       19 next unless defined $element2;
178 12 100       38 return 1
179             if $element1 eq $element2;
180             }
181             }
182            
183 3         8 return 0;
184             }
185              
186             "Schmitt ~ Smith ~ Schmitz";
187              
188             =encoding utf8
189              
190             =pod
191              
192             =head1 NAME
193              
194             Text::Phonetic - A base class for phonetic algorithms
195              
196             =head1 SYNOPSIS
197              
198             use Text::Phonetic::Metaphone;
199            
200             my $phonetic = Text::Phonetic::Metaphone->new();
201             $encoded_string = $phonetic->encode($string);
202             @encoded_list = $phonetic->encode(@list);
203            
204             my $same = $phonetic->compare($string1,$string2);
205              
206             Or
207              
208             use Text::Phonetic;
209             my $phonetic = Text::Phonetic->load( algorithm => 'Phonix' );
210             $encoded_string = $phonetic->encode($string);
211              
212             This module provides an easy and convinient way to encode names with various
213             phonetic algorithms. It acts as a wrapper around other phonetic algorithm
214             modules like L, L, L
215             and also implements some other algorithms such as
216             L, L,
217             L and L.
218              
219             This module can easily be subclassed.
220              
221             =head1 DESCRIPTION
222              
223             =head2 Constructors
224              
225             =head3 new
226              
227             $obj = Text::Phonetic::SUBCLASS->new(%PARAMETERS)
228            
229             You can pass arbitrary attributes to the constructor. The only global
230             attribute is C which defaults to 1 if not set. This attribute
231             controls if non-latin characters should be transliterated to A-Z
232             (see also L).
233              
234             Additional attributes may be defined by the various implementation classes.
235              
236             =head3 load
237              
238             $obj = Text::Phonetic->load(algorithm => $algorithm, %PARAMETERS)
239              
240             Alternative constructor which also loads the requested algorithm subclass.
241              
242             =head2 Methods
243              
244             =head3 encode
245              
246             $RETURN_STRING = $obj->encode($STRING);
247             OR
248             @RETURN_LIST = $obj->encode(@LIST);
249             OR
250             $RETURN_LIST_REF = $obj->encode(@LIST);
251            
252             Encodes the given string or list of strings. Returns a single value, array or
253             array reference depending on the caller context and parameters.
254              
255             Returns undef on an empty/undefined/whitespace only string.
256              
257             =head3 compare
258              
259             $RETURN_CODE = $obj->compare($STRING1,$STRING2);
260            
261             The return code is an integer between 100 and 0 indicating the likelihood that
262             the to results are the same. 100 means that the strings are completely
263             identical. 99 means that the strings match after all non-latin characters
264             have been transliterated. Values in between 98 and 1 usually mean that the
265             given strings match. 0 means that the used alogorithm couldn't match the two
266             strings at all.
267             C is a shortcut to the C<$obj-E_do_compare($CODE1,$CODE2)> method.
268              
269             =head2 Class Methods
270              
271             =head3 available_algorithms
272              
273             my @available = Text::Phonetic->available_algorithms;
274              
275             Returns a list of all available/installed algorithms
276              
277             =head1 SUBLCASSING
278              
279             You can easily subclass Text::Phonetic and add your own phonetic algorithm.
280             All subclasses must use Text::Phonetic as their base class, reside in
281             the Text::Phonetic namespace, and implement the following methods:
282              
283             =head2 _do_encode
284              
285             $RESULT = $obj->_do_encode($STRING);
286              
287             This method does the actual encoding. It should return either a string or
288             an array reference.
289              
290             =head2 _do_compare
291              
292             $RETURN_STRING = $obj->_do_compare($RESULT1,$RESULT2);
293            
294             If your C<_do_encode> method doesn't return a single scalar value you also
295             might need to implement a comparison method. It takes two results as returned
296             by C<_do_encode> and returns an integer value between 98 and 0
297             (see L<"compare">).
298              
299             =head2 _predicates
300              
301             Third party modules can be marked as predicates by adding the C<_predicates>
302             method which should return al list of package names. All predicates will be
303             loaded if installed. If missing an exception will be thrown.
304              
305             =head2 Object structure
306              
307             Text::Phonetic uses L to declare attributes.
308              
309             =head2 Helper class methods
310              
311             =head3 _compare_list
312              
313             Text::Phonetic::_compare_list($LIST1_REF,$LIST2_REF);
314              
315             Compares the two arrays and returns true if at least one element is equal
316             (ignoring the position) in both lists.
317              
318             =head2 Example class
319              
320             package Text::Phonetic::MyAlgorithm;
321             use Moo;
322             extends qw(Text::Phonetic);
323            
324             has someattribute => (
325             is => 'rw',
326             );
327            
328             sub _do_encode {
329             my ($self,$string) = @_;
330             # Do something
331             return $phonetic_representation;
332             }
333            
334             __PACKAGE__->meta->make_immutable;
335             no Moo;
336             1;
337              
338             =head1 SEE ALSO
339              
340             L (Build phonetic indices via DBIx::Class),
341             L (Phonetic encoding for video game titles)
342              
343             =head1 SUPPORT
344              
345             Please report any bugs or feature requests to C, or
346             through the web interface at
347             L.
348             I will be notified, and then you'll automatically be notified of progress on
349             your report as I make changes.
350              
351             =head1 AUTHOR
352              
353             Maroš Kollár
354             CPAN ID: MAROS
355             maros [at] k-1.com
356            
357             http://www.k-1.com
358              
359             =head1 COPYRIGHT
360              
361             Text::Phonetic is Copyright (c) 2006-2012 Maroš Kollár
362             - L
363              
364             =head1 LICENCE
365              
366             This library is free software, you can redistribute it and/or modify
367             it under the same terms as Perl itself.
368              
369             =cut