File Coverage

blib/lib/WordLists/Pronounce/Pronouncer.pm
Criterion Covered Total %
statement 61 75 81.3
branch 10 24 41.6
condition 3 9 33.3
subroutine 10 12 83.3
pod 0 4 0.0
total 84 124 67.7


line stmt bran cond sub pod time code
1             package WordLists::Pronounce::Pronouncer;
2 1     1   32766 use strict;
  1         2  
  1         42  
3 1     1   6 use warnings;
  1         2  
  1         35  
4 1     1   6 use utf8;
  1         2  
  1         9  
5 1     1   854 use WordLists::Base;
  1         3  
  1         746  
6             our $VERSION = $WordLists::Base::VERSION;
7            
8             sub new
9             {
10 1     1 0 15 my ($class, $args) = @_;
11 1 50       5 $args = {} unless defined $args;
12 0     0   0 my $self = {
13             cleaner => \&_default_clean,
14             cb_word => undef,
15             cb_phrase => undef,
16 0         0 cb_fail => sub{warn "$_[1] not found\n"; return 'X';},
17 1         7 %{$args}
  1         15  
18             };
19 1 50       5 unless (defined $self->{'lookup'})
20             {
21 0         0 warn ('No lookup set for '.$class);
22            
23             #
24             }
25 1         12 bless $self, $class;
26             }
27             sub lookup
28             {
29 4     4 0 6 my ($self, $new) = @_;
30 4 50       8 $self->{'lookup'} = $new if defined $new;
31 4         10 return $self->{'lookup'};
32             }
33             sub clean
34             {
35 4     4 0 6 my ($self, $word, $args) = @_;
36 4         6 &{$self->{'cleaner'}}($self,$word,$args);
  4         11  
37             }
38             sub _default_clean
39             {
40 4     4   13 my ($self, $word, $args) = @_;
41 4         9 $word =~ s/\([^)]*\)//g;
42 4         7 $word =~ s/^\*//;
43 4         8 $word =~ s/^\s+//;
44 4         10 $word =~ s/\s+$//;
45 4         11 $word =~ s/[’']s\b//s;
46 4         7 $word =~ s/s'\b/s/;
47 1     1   7 $word =~ tr{/’.?",…+;:’‘}{ }d;
  1         3  
  1         15  
  4         27  
48 4         11 $word =~ s/\s+/ /g;
49 4         15 $word =~ s/^'//;
50 4         7 $word =~ s/'$//;
51 4         9 $word = lc $word;
52 4         11 return $word;
53             }
54             sub pronounce_phrase
55             {
56 4     4 0 9 my ($self, $word, $args) = @_;
57 4         7 my $field = $args->{'field'};
58 4   33     11 $field ||= $self->{'field'};
59 4 50       8 warn ('Source field must be defined!') unless ($field);
60 4         9 my $lookup = $self->lookup;
61 4         5 my $ipa="";
62 4         10 $word = $self->clean($word, $args);
63 4         20 my @senses = grep {$_->get($field)} $lookup->get_senses_for($word);
  5         17  
64 4 50       17 return "" if ($word eq "");
65 4         7 my @ipaout = ();
66 4 100       9 if (@senses) # if the whole input string matches
67             {
68 3         9 $ipa = $senses[0]->get($field);
69 3         6 push @ipaout, $ipa;
70             }
71             else # if the whole input string does not match, try splitting and pronouncing each component
72             {
73 1         7 my @words = split(/[- ?]+/, $word);
74 1 50       4 if ($#words > 0)
75             {
76 1         4 foreach (@words)
77             {
78 2         19 push @ipaout, $self->pronounce_phrase($_, {%$args,(as_word=>1)});
79             }
80             }
81             else # if non-matching input string is a single word, fail.
82             {
83 0 0       0 if (defined $self->{'cb_fail'} )
84             {
85 0 0   0   0 if (ref ($self->{'cb_fail'}) eq ref (sub {}))
  0 0       0  
86             {
87 0         0 return &{$self->{'cb_fail'}}($self, $word, $args);
  0         0  
88             }
89             elsif (ref ($self->{'cb_fail'}) eq ref (''))
90             {
91 0         0 return $self->{'cb_fail'};
92             }
93             else
94             {
95 0         0 return '';
96             # Don't return undef, as this will probably break string concatenation.
97             }
98             }
99             }
100             }
101 4         12 my $out = join(" ", @ipaout) ;
102 4 50 33     25 if (defined $self->{'cb_word'} and $args->{'as_word'}) # Callback for processing string as a word
    50 33        
103             {
104 0         0 $out = &{$self->{'cb_word'}}($self, $out, $args);
  0         0  
105             }
106             elsif (defined $self->{'cb_phrase'} and !$args->{'as_word'}) # Callback for processing string as a phrase
107             {
108 0         0 $out = &{$self->{'cb_phrase'}}($self, $out, $args) ;
  0         0  
109             }
110 4         21 return $out;
111             }
112            
113             return 1;
114            
115             =pod
116            
117             =head1 NAME
118            
119             WordLists::Pronounce::Pronouncer
120            
121             =head1 SYNOPSIS
122            
123             my $pronouncer = WordLists::Pronounce::Pronouncer->new({ lookup => $here_is_one_i_prepared_earlier });
124             $ipa = $pronouncer->pronounce_phrase('tomato stew', {field=>'uspron'});
125            
126             =head1 DESCRIPTION
127            
128             Allows the user to create and configure a pronouncing object which will accept strings and have a guess at their IPA transcription, based on a pre-generated list of pronunciations.
129            
130             The user must specify a lookup object containing this list.
131            
132             =head1 TODO
133            
134             Write and implement an API for reading several pronunciations and comparing them (possibly once the whole phrase has been put together).
135            
136             Try to guess stress for compounds based on a list of compounds and/or at least making sure each word has stress. (This might be impossible to do meaningfully)
137            
138             =head1 BUGS
139            
140             Please use the Github issues tracker.
141            
142             =head1 LICENSE
143            
144             Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
145            
146             =cut