File Coverage

blib/lib/Lingua/SoundChange.pm
Criterion Covered Total %
statement 95 104 91.3
branch 32 48 66.6
condition 11 16 68.7
subroutine 12 15 80.0
pod 3 8 37.5
total 153 191 80.1


line stmt bran cond sub pod time code
1             package Lingua::SoundChange;
2              
3 4     4   101155 use 5.005_03;
  4         14  
  4         185  
4 4     4   22 use strict;
  4         7  
  4         142  
5 4     4   26 use Carp;
  4         11  
  4         413  
6             # use warnings;
7              
8             # for debugging:
9 4   50 4   20 use constant PRINT_RULES => $ENV{LINGUA_SOUNDCHANGE_PRINTRULES} || 0;
  4         8  
  4         355  
10 4     4   19 use constant DEBUG => 0;
  4         8  
  4         166  
11 4     4   21 use vars qw($VERSION);
  4         7  
  4         6870  
12              
13             $VERSION = '0.05';
14              
15             sub compile_rules($$$$);
16             sub compile_vars($$);
17              
18             sub new {
19 22     22 1 20944 my($class, $vars, $rules, $opts) = @_;
20              
21 22   100     109 $opts ||= { };
22              
23 22 50       88 croak '$vars must be a hash reference!' unless ref $vars eq 'HASH';
24 22 50       68 croak '$rules must be an array reference!' unless ref $rules eq 'ARRAY';
25              
26 22         95 my $obj = {
27             raw_vars => $vars,
28             raw_rules => $rules,
29             opts => $opts,
30             };
31              
32 22         66 $obj->{vars} = compile_vars($vars, $opts);
33 22 100       69 if($opts->{longVars}) {
34 6         47 $obj->{raw_vars}->{"\Q<$_>\E"} = $vars->{$_} for keys %$vars;
35             }
36 22         181 ( $obj->{rules},
37             $obj->{code} ) = compile_rules($rules, $obj->{vars}, $obj->{raw_vars}, $opts);
38              
39 22         112 bless $obj, $class;
40             }
41              
42             sub change {
43 22     22 1 14483 my($self, $words) = @_;
44              
45 22 50       69 $words = [] unless defined $words;
46              
47 22 50       682 croak("change needs an array reference") unless ref($words) eq 'ARRAY';
48              
49 22         48 foreach my $word (@$words) {
50 54         77 my $origword = $word;
51 54         64 my @rules;
52             my $ruleout;
53 54         65 foreach my $rule (@{$self->rules}) {
  54         131  
54 168         5064 ($word, $ruleout) = $rule->($word);
55 168 100       573 push @rules, $ruleout if defined $ruleout;
56             }
57              
58 54         321 $word = { orig => $origword, word => $word, rules => \@rules };
59             }
60              
61 22         87 $words;
62             }
63              
64              
65             # Private methods
66              
67             sub compile_rules ($$$$) {
68 22     22 0 49 my($rules, $vars, $varstring, $opt) = @_;
69              
70 22 50       67 croak "rules not an array reference" unless ref $rules eq 'ARRAY';
71 22 50       65 croak "vars not a hash reference" unless ref $vars eq 'HASH';
72 22 50       55 croak "varstring not a hash reference" unless ref $varstring eq 'HASH';
73 22 50       56 croak "opt not a hash reference" unless ref $opt eq 'HASH';
74              
75 22         35 my @compiledrules;
76             my %code;
77              
78             # Rules: change from a sound (one or more letters) or a category
79             # to another sound or category, in a certain environment.
80             # Categories may only be one letter long and are usually uppercase.
81             # Environments must contain a _ symbol to show where the replacement
82             # takes place; it may also contain letters, categories, and the special
83             # symbols ( ) (to enclose optional parts) and # (beginning or end of
84             # word).
85             # Rules can only change sounds to sounds, and categories to categories.
86             # If a category is to be changed to another category, they should be
87             # the same length. Otherwise the second category will have its laster
88             # letter repeated until it has the same length as the first (if it is
89             # shorter), or characters in the second category that don't match
90             # characters in the first will not be produced by such a range. Note
91             # that this is an artefact of the use of tr/// and is not guaranteed
92             # behaviour.
93             # Don't use regex metacharacters (except for the parentheses which
94             # show optional elements) in the environment or in names of categories
95             # or sounds.
96             # These include: . * + ? ^ $ [ ]
97              
98 22         50 foreach my $rule (@$rules) {
99 40 50       283 if( $rule =~ m{
100             ^
101             ( [^/]+ ) # "change from" to $1
102             / # slash
103             ( [^/]* ) # "change to" to $2 (may be blank)
104             / # slash
105             ( .+ ) # "environment" to $3 (may not be blank)
106             $
107             }x )
108             {
109 40         186 my($from, $to, $env) = ($1, $2, $3);
110 40         76 my($subfrom, $subto) = ('', '');
111              
112             my $option = sub {
113 80     80   128 my $word = shift;
114              
115             # Change parentheses round one element to a question
116             # mark following it, ...
117 80         122 $word =~ s{
118             \( # open parenthesis
119             (.) # one character, to $1
120             \) # close parenthesis
121             }{
122 0         0 $1 . '?'
123             }gex;
124              
125             # ... and parentheses around multiple elements by
126             # non-capturing parentheses followed by a question mark
127             # (but nested parentheses are not allowed!)
128 80         97 $word =~ s{
129             \( # open parenthesis
130             ( [^()]+ ) # one or more non-parenthesis characters,
131             # to $1
132             \) # close parenthesis
133             }{
134 0         0 '(?:' . $1 . ')?'
135             }gex;
136              
137 80         189 $word;
138 40         260 };
139              
140             # Escape dollar signs and at signs in $env, which would
141             # otherwise try to interpolate a variable into the regular
142             # expression
143 40         86 $env =~ s/([\$\@])/\\$1/g;
144              
145             # Get the bits before and after the underscore
146             # and put them in capturing parentheses in $subfrom
147 40 50       195 if($env =~ /^(#?)([^_#]*)(_)([^_#]*)(#?)$/) {
148             # leading #
149 40 100 66     226 $subfrom .= '^' if defined $1 && length $1;
150              
151             # preceding stuff, so we don't need $`
152             # non-greedy
153             # If match anchored at beginning, don't add this
154 40 100 66     214 if(defined $1 && length $1) {
155 1         2 $subfrom .= '()';
156             } else {
157 39         66 $subfrom .= '(.*?)';
158             }
159              
160             # pre-environment
161 40         146 $subfrom .= '(' . $option->(quotemeta $2) . ')';
162              
163             # underscore
164 40         639 $subfrom .= "(\Q$from\E)";
165              
166             # post-environment
167 40         112 $subfrom .= '(' . $option->(quotemeta $4) . ')';
168              
169             # trailing #
170 40 100 66     229 $subfrom .= '$' if defined $5 && length $5;
171             }
172              
173             # Now expand categories
174 40 100       120 if($opt->{longVars}) {
175 9 50       53 $subfrom =~ s{(\\<[^>]+\\>)}{$vars->{$1} || $1}eg;
  11         64  
176             } else {
177 31 100       153 $subfrom =~ s{(\\?)(.)}{$vars->{$2} || $1 . $2}eg;
  396         2036  
178             }
179              
180             # Show where the rule matches
181 40         135 $subto .= qq(\$rule = "\Q$from\E->\Q$to\E /\Q$env\E applies to \$word at " . (length(\$1)+1) . "\\n"; );
182              
183 40         55 $subto .= '$1 . $2 . ';
184 40 100 66     151 $subto .= ($vars->{quotemeta $from}
185             ? "do { my \$char = \$3; \$char =~ tr{$varstring->{quotemeta $from}}{" . ($varstring->{quotemeta $to} || $to) . "}; \$char }"
186             : "q{" . $to . "}");
187 40         53 $subto .= ' . $4';
188              
189 40         649 if(PRINT_RULES) {
190             print "[", $#compiledrules + 1, "] $rule --> s{$subfrom}{$subto}eg\n";
191             }
192              
193 40         246 my $code = <<"EOF";
194             sub {
195             my \$word = shift;
196             my \$rule;
197             my \$orig;
198             # print qq(Working on '\$word'; \Q$from\E->\Q$to\E /\Q$env\E; from is '\Q$subfrom\E and to is '\Q$subto\E'\\n);
199             1 while ((\$orig = \$word) =~ s{$subfrom}{$subto}e) && (\$orig ne \$word) && (\$word = \$orig);
200             return ( \$word, \$rule );
201             }
202             EOF
203              
204 40         8649 push @compiledrules, eval $code;
205 40 50       131 croak "Problem with '$code'" unless $compiledrules[-1];
206 40         310 $code{$compiledrules[-1]} = $code;
207             }
208             }
209              
210 22         140 ( \@compiledrules, \%code );
211             }
212              
213             sub compile_vars ($$) {
214 22     22 0 44 my($vars, $opt) = @_;
215              
216 22 50       73 croak "\$vars is not a hash reference" unless ref $vars eq 'HASH';
217 22 50       71 croak "\$opt is not a hash reference" unless ref $opt eq 'HASH';
218              
219 22         28 my %compiledvars;
220              
221 22         94 foreach my $var (keys %$vars) {
222 26         46 my $list = $vars->{$var};
223             # Escape at signs and dollars in the list
224 26         49 $list =~ s/([\$\@])/\\$1/g;
225              
226 26 100       62 if($opt->{longVars}) {
227 12         226 $compiledvars{"\Q<$var>\E"} = qr/[$list]/;
228 12         34 print qq[($var => $list // $compiledvars{"\Q<$var>\E"})\n] if PRINT_RULES;
229             } else {
230 14         249 $compiledvars{$var} = qr/[$list]/;
231 14         42 print "($var => $list // $compiledvars{$var})\n" if PRINT_RULES;
232             }
233             }
234              
235 22         84 \%compiledvars;
236             }
237              
238             sub rules {
239 54     54 1 79 my($self) = @_;
240              
241 54         181 $self->{rules};
242             }
243              
244             sub vars {
245 0     0 0   my($self) = @_;
246              
247 0           $self->{vars};
248             }
249              
250             sub varstring {
251 0     0 0   my($self) = @_;
252              
253 0           $self->{raw_vars};
254             }
255              
256             sub code {
257 0     0 0   my($self, $token, $code) = @_;
258              
259 0 0         $self->{code}->{$token} = $code if $code;
260              
261 0           $self->{code}->{$token};
262             }
263              
264              
265             1;
266             __END__