File Coverage

blib/lib/Lingua/Phonology/Rules.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				#!/usr/bin/perl
2
3				package Lingua::Phonology::Rules;
4
5				=head1 NAME
6
7				Lingua::Phonology::Rules - a module for defining and applying
8				phonological rules.
9
10				=head1 SYNOPSIS
11
12				use Lingua::Phonology;
13				$phono = new Lingua::Phonology;
14
15				$rules = $phono->rules;
16
17				# Adding and manipulating rules is discussed in the "WRITING RULES"
18				# section
19
20				=head1 DESCRIPTION
21
22				This module allows for the creation of linguistic rules, and the application of
23				those rules to "words" of Segment objects. You, the user, add rules to a Rules
24				object, defining various parameters and code references that actually perform
25				the action of the rule. Lingua::Phonology::Rules will take care of the guts of
26				applying and creating rules.
27
28				The rules you create may have the following parameters. This is just a brief
29				description of the parameters--a more detailed discussion of their effect is in
30				the L<"WRITING RULES"> section.
31
32				=over 4
33
34				=item * domain
35
36				Defines the domain within which the rule applies. This should be
37				the name of a feature in the featureset of the segments which the rule is
38				applied to.
39
40				=item * tier
41
42				Defines the tier on which the rule applies. Must be the name of a
43				feature in the feature set for the segments of the word you pass in.
44
45				=item * direction
46
47				Defines the direction that the rule applies in. Must be
48				either 'leftward' or 'rightward.' If no direction is given, defaults to
49				'rightward'.
50
51				=item * filter
52
53				Defines a filter for the segments that the rule applies on.
54				Must a code reference that returns a truth value.
55
56				=item * linguistic
57
58				Defines a linguistic-style rule to be parsed. When you provide a
59				linguistic-style rule, it is parsed into code references that take the place of
60				the C and C properties listed below. The format of linguistic rules
61				is described in L.
62
63				=item *
64
65				B - defines the condition or conditions where the rule applies. Must be a
66				coderef that returns a truth value. If no value is given, defaults to
67				always true.
68
69				=item *
70
71				B - defines the action to take when the C condition is met. Must be
72				a code reference. If no value is given, does nothing.
73
74				=back
75
76				Lingua::Phonology::Rules is flexible and powerful enough to handle any
77				sequential type of rule system. It cannot handle Optimality Theory-style
78				processes, because those require a fundamentally different kind of
79				algorithm.
80
81				=cut
82
83	1	1	25404	use strict;
	1		4
	1		38
84	1	1	5	use warnings;
	1		3
	1		37
85	1	1	6	use warnings::register;
	1		2
	1		181
86	1	1	6	use Carp;
	1		2
	1		95
87	1	1	633	use Lingua::Phonology::Common;
	0
	0
88				use Lingua::Phonology::Word;
89				use Lingua::Phonology::Segment::Rules;
90				use Lingua::Phonology::Segment::Boundary;
91				use Lingua::Phonology::Segment::Tier;
92
93				our $VERSION = 0.3;
94
95				sub err ($) { _err($_[0]) if warnings::enabled() };
96
97				# This variable is created the first time someone tries to parse a lingustic
98				# rule, and reused thereafter
99				our $PARSER;
100
101				# Define valid properties for rules, name => default format. undef's for no default
102				our %property = (
103				where => sub {1},
104				do => sub {},
105				tier => undef,
106				filter => undef,
107				result => undef,
108				domain => undef,
109				direction => 'rightward'
110				);
111
112				# Hash of property => validating coderef
113				our %valid = (
114				where => sub { _is($_[0], 'CODE') },
115				do => sub { _is($_[0], 'CODE') },
116				result => sub { _is($_[0], 'CODE') },
117				filter => sub { _is $_[0], 'CODE' },
118				tier => sub {1},
119				domain => sub {1},
120				direction => sub { $_[0] = lc $_[0]; $_[0] eq 'rightward' \|\| $_[0] eq 'leftward' }
121				);
122
123				# List of properties passed on to Lingua::Phonology::Word
124				our %worder = (
125				filter => undef,
126				tier => undef,
127				domain => undef,
128				direction => undef
129				);
130
131				for my $method (keys %worder) {
132				no strict 'refs';
133				*$method = sub {
134				my $self = shift;
135				my $rule = shift;
136				return err "No such rule '$rule'" unless exists $self->{RULES}->{$rule};
137				$self->{RULES}->{$rule}->{word}->$method(@_);
138				}
139				}
140
141				# Additional arrays that list properties expecting code and text respectively
142				our @code = qw/where do filter result/;
143				our @text = qw/tier domain direction/;
144
145				# Build accessors for properties
146				foreach my $method (keys %valid) {
147				next if exists $worder{$method};
148				no strict 'refs';
149				*$method = sub {
150				my $self = shift;
151				my $rule = shift;
152				return err "No such rule '$rule'" unless exists $self->{RULES}->{$rule};
153				if (@_) {
154				# When defined, check for validity and add
155				if (defined $_[0]) {
156				if ($valid{$method}->($_[0])) {
157				$self->{RULES}->{$rule}->{$method} = $_[0];
158				}
159				else {
160				return err "Bad argument to $method()";
161				}
162				}
163				# Otherwise, delete the key
164				else {
165				delete $self->{RULES}->{$rule}->{$method};
166				}
167				}
168				return $self->{RULES}->{$rule}->{$method};
169				};
170				}
171
172				# Constructor
173				sub new {
174				my $proto = shift;
175				my $class = ref($proto) \|\| $proto;
176				my $self = {
177				RULES => { }, # list of rules
178				ORDER => [ ], # rule order
179				PERSIST => [ ], # list of persistent rules
180				COUNT => 0, # count of times a rule applied, set by apply or apply_all
181				};
182				bless ($self, $class);
183				return $self;
184				}
185
186				# Add a rule. Called as $rules->add_rule( Name => { ... } );
187				sub add_rule {
188				my ($self, %rules) = @_;
189				my $err = 0;
190
191				RULE: for my $rule (keys(%rules)) {
192				# Check rules or complain
193				$self->_check_rule($rules{$rule}, $rule) or do {
194				$err = 1;
195				next RULE;
196				};
197
198				# Drop existing rules
199				$self->drop_rule($rule);
200
201				# Add new rules
202				$self->_add_rule($rule, $rules{$rule});
203
204				}
205				return $err ? () : 1;
206				}
207
208				# Drop a rule
209				sub drop_rule {
210				my $self = shift;
211				delete $self->{RULES}->{$_} for @_;
212				return scalar @_;
213				}
214
215				# Like add_rule, but check that the rule exists
216				sub change_rule {
217				my ($self, %rules) = @_;
218				my $err = 0;
219
220				RULE: for my $rule (keys %rules) {
221				# Complain when the rule doesn't exist
222				if (not exists $self->{RULES}->{$rule}) {
223				err("No such rule '$rule'");
224				$err = 1;
225				next RULE;
226				}
227
228				# Check rules
229				$self->_check_rule($rules{$rule}, $rule) or do {
230				$err = 1;
231				next RULE;
232				};
233
234				# Add rules
235				$self->_add_rule($rule, $rules{$rule});
236				}
237				return $err ? () : 1;
238				}
239
240				sub _check_rule {
241				my ($self, $href, $name) = @_;
242				# Parse ling rules
243				if (exists $href->{linguistic}) {
244				($href->{where}, $href->{do}) = _parse_ling($href->{linguistic});
245				unless ($href->{where} && $href->{do}) {
246				return err "Couldn't parse linguistic rule for '$name'";
247				}
248				$href->{where} = _parse_ext($href->{where});
249				$href->{do} = _parse_ext($href->{do});
250				}
251
252				# Validate keys
253				for (keys %$href) {
254				if (exists $valid{$_}) {
255				unless ($valid{$_}->($href->{$_})) {
256				return err("Invalid value for $_ in rule '$name'");
257				}
258				}
259				}
260				return 1;
261				}
262
263				sub _add_rule {
264				my ($self, $rule, $href) = @_;
265
266				$self->{RULES}->{$rule} = {};
267				$self->{RULES}->{$rule}->{word} = Lingua::Phonology::Word->new();
268				for (keys %property) {
269				$self->$_($rule, $href->{$_} \|\| $property{$_});
270				}
271				}
272
273				sub clear {
274				my $self = shift;
275				$self->{RULES} = {};
276				$self->{ORDER} = [];
277				$self->{PERSIST} = [];
278				return 1;
279				}
280
281				sub loadfile {
282				my ($self, $file) = @_;
283
284				# Calling loadfile() w/o a file loads a default, but there is no default
285				# rule set. So do nothing and return true.
286				return 1 if not defined $file;
287
288				my $parse;
289				eval { $parse = _parse_from_file($file, 'rules') };
290				return err($@) if $@;
291				$self->_load_from_struct($parse);
292				}
293
294				sub _load_from_struct {
295				my ($self, $parse) = @_;
296				my $err = 0;
297
298				# Handle rule declarations
299				RULE: for my $href (@{$parse->{rule}}) {
300				my $parm = {};
301
302				# If we ONLY have content, make it into a href
303				if (not _is($href, 'HASH')) {
304				$href = { content => $href };
305				}
306
307				# Take linguistic-style rules from the content
308				if (exists $href->{content}) {
309				# _parse_ling() returns where and do
310				($href->{where}, $href->{do}) = _parse_ling($href->{content});
311				unless ($href->{where} && $href->{do}) {
312				err "Couldn't parse linguistic rule '$href->{name}'";
313				$err = 1;
314				next RULE;
315				}
316				}
317
318				# Iterate over elements
319				# code elements
320				for (@code) {
321				next if not exists $href->{$_};
322
323				# If we ONLY have content
324				if (not _is $href->{$_}, 'HASH') {
325				$href->{$_} = { content => $href->{$_} };
326				}
327
328				if ((not exists $href->{$_}->{type}) \|\| $href->{$_}->{type} ne 'plain') {
329				eval { $parm->{$_} = _parse_ext $href->{$_}->{content} };
330				}
331				else {
332				eval { $parm->{$_} = _parse_plain $href->{$_}->{content} };
333				}
334
335				# Always check $@ after parsing
336				if ($@) {
337				err("Error processing rule $href->{name}: $@\n");
338				$err = 1;
339				next RULE;
340				}
341				}
342				# text elements
343				for (@text) {
344				next if not exists $href->{$_};
345				$parm->{$_} = $href->{$_}->{value};
346				}
347				# Future types to be added here
348
349				$self->add_rule($href->{name} => $parm) \|\| do { $err = 1 };
350
351				}
352
353				# Handle ordering rules
354				$self->order( map { [ map { $_->{name} } @{$_->{rule}} ] } @{$parse->{order}} );
355
356				# Handle persistent rules
357				$self->persist( map { $_->{name} } @{$parse->{persist}} );
358
359				return $err ? () : 1;
360				}
361
362				sub _parse_ling {
363				my $str = shift;
364
365				if (not $PARSER) {
366				require Lingua::Phonology::RuleParser;
367				$PARSER = Lingua::Phonology::RuleParser->new();
368				}
369
370				my $parse = $PARSER->Rule($str);
371				return if not $parse;
372				return err "Unbalanced rule" unless @{$parse->{from}} == @{$parse->{to}};
373
374				my (@do, @where); # Holds the statements built
375				my $nulls = 0; # Counts the nulls encountered so far
376
377				# Iterate over $parse->{from}, adding elements to @where and @do
378				for my $i (0 .. $#{$parse->{from}}) {
379				# The FROM item is not '__NULL': we have a real seg and need to make a test statement
380				if ($parse->{from}[$i] ne '__NULL') {
381				my $idx = $i - $nulls;
382				push @where, _test_seg($idx, $parse->{from}[$i]);
383				push @do, _set_seg($idx, $parse->{to}[$i]);
384				}
385				# The FROM item is '__NULL'--insert a segment
386				elsif ($parse->{from}[$i] eq '__NULL') {
387				push @do, _insert_seg($i - $nulls, $parse->{to}[$i]);
388				$nulls++;
389				}
390				}
391
392				# Special case for when we ONLY have nulls in FROM and the next segment is
393				# a boundary. Normally this would generate a $_[0]->BOUNDARY statement, but
394				# that can never succeed, so we roll focus back one seg.
395				my $backstep = 0;
396				if ($nulls == @{$parse->{from}}) {
397				foreach (@{$parse->{when}}) {
398				if ($_->[1][0] eq '__BOUNDARY') {
399				# Redact the existing statements
400				foreach (@do) {
401				s/\[(-?\d+)\]/\[$1 + 1\]/g;
402				}
403				$backstep = 1;
404				last;
405				}
406				}
407				}
408
409				# Build the strings based on the "when" property
410				my @conds;
411				for my $cond (@{$parse->{when}}) {
412				my @thiscond;
413				# The pre '_' segments are in [0], go through them backwards
414				for (my $i = -1; $i >= -@{$cond->[0]}; $i--) {
415				push @thiscond, _test_seg($i + $backstep, $cond->[0][$i]);
416				}
417				# The post '_' statements are in $cond->[1].
418				for my $i (0 .. $#{$cond->[1]}) {
419				my $idx = $i + $backstep + scalar @{$parse->{from}} - $nulls;
420				push @thiscond, _test_seg($idx, $cond->[1][$i]);
421				}
422				push @conds, '(' . join(' && ', @thiscond) . ')';
423				}
424				push @where, '(' . join(' \|\| ', @conds) . ')' if @conds;
425
426				# Final joins - return (where, do)
427				return join(" && ", @where), join("\n", @do);
428				}
429
430				# Segs should be an array ref, a hash ref, '__BOUNDARY', or a string
431				sub _test_seg {
432				my ($i, $seg) = @_;
433				if (ref $seg eq 'ARRAY') {
434				return '(' . join(' \|\| ', map { _test_seg($i, $_) } @$seg) . ')';
435				}
436				elsif (ref $seg eq 'HASH') {
437				# Special case for empty hash - corresponds to "[]" in input, which we
438				# want to be always true, instead of compiling to "()", which would be
439				# always false
440				return 1 if not keys %$seg;
441
442				# General case
443				return '(' . join(' && ', map { _test_feature($i, $_, $seg->{$_}) } keys %$seg) . ')';
444				}
445				elsif ($seg eq '__BOUNDARY') {
446				return "(\$_[$i]->BOUNDARY)";
447				}
448				else {
449				$seg = quotemeta $seg;
450				return "(\$_[$i]->spell eq \"$seg\")";
451				}
452				}
453
454				# Set seg cannot take an array ref, otherwise the same as _test_seg.
455				sub _set_seg {
456				my ($i, $seg) = @_;
457				if (ref $seg eq 'HASH') {
458				return join "\n", map { _set_feature($i, $_, $seg->{$_}) } keys %$seg;
459				}
460				elsif ($seg eq '__NULL') {
461				return "\$_[$i]->DELETE;\n";
462				}
463				else {
464				return "Lingua::Phonology::Functions::change(\$_[$i], \"$seg\");\n";
465				}
466				}
467
468				# Features should be '__TRUE', '__FALSE', a number, or a string
469				sub _test_feature {
470				my ($i, $feat, $val) = @_;
471				no warnings 'numeric';
472
473				if ($val eq '__TRUE') {
474				return "(\$_[$i]->value(\"$feat\"))";
475				}
476				elsif ($val eq '__FALSE') {
477				return "(not \$_[$i]->value(\"$feat\"))";
478				}
479				elsif ($val eq int $val) {
480				return "(\$_[$i]->value(\"$feat\") == $val)";
481				}
482				else {
483				return "(\$_[$i]->value(\"$feat\") eq \"$val\")";
484				}
485				}
486
487				sub _set_feature {
488				my ($i, $feat, $val) = @_;
489				no warnings 'numeric';
490
491				$val = 1 if $val eq '__TRUE';
492				if ($val eq '__FALSE') {
493				return "\$_[$i]->delink(\"$feat\");";
494				}
495				elsif ($val eq int $val) {
496				return "\$_[$i]->value(\"$feat\", $val);";
497				}
498				else {
499				return "\$_[$i]->value(\"$feat\", \"$val\");";
500				}
501				}
502
503				sub _insert_seg {
504				my ($i, $seg) = @_;
505				# Make an unlikely-to-repeat variable name
506				my $var = sprintf "\$new%06d", rand(1_000_000);
507				# Take the new segment from $_[+0], which won't be touched by backstepping
508				my $rv = "my $var = \$_[+0]->new;\n";
509				# Do a normal _set_seg w/ '~' placeholder and then s/// the result
510				$rv .= _set_seg('~', $seg);
511				$rv =~ s/\$_\[~\]/$var/g;
512
513				$rv .= "\$_[$i]->INSERT_LEFT($var);\n";
514				return $rv;
515				}
516
517				sub _to_str {
518				my ($self, $file) = @_;
519
520				require B::Deparse;
521				my $dpar = B::Deparse->new('-x7', '-p', '-si6');
522				$dpar->ambient_pragmas(strict => 'all', warnings => 'all');
523
524				# Hashref structure
525				my $href = { rule => {}, order => { block => [] }, persist => [] } ;
526
527				# Construct href entries for rules
528				for my $rule (keys %{$self->{RULES}}) {
529				# Add our name
530				for (@text) {
531				$href->{rule}->{$rule}->{$_} = { value => $self->$_($rule) }
532				if defined $self->$_($rule);
533				}
534				for (@code) {
535				next if not defined $self->$_($rule);
536				my $str = _deparse_ext $self->$_($rule), $dpar or err($@);
537				$href->{rule}->{$rule}->{$_} = [ $str . ' ' ]; # Extra whitespace to help alignment
538				}
539				}
540
541				# Href entries for order and persist
542				$href->{order}->{block} = [ map { { rule => [ map { { name => $_ } } @$_ ] } } $self->order ];
543				$href->{persist} = { rule => [ map { { name => $_ } } $self->persist ] };
544
545				return eval { _string_from_struct({ rules => $href }) };
546				}
547
548				sub apply {
549				my ($self, $rule, $orig) = @_;
550
551				return err("No such rule '$rule'") unless exists $self->{RULES}->{$rule};
552				return err "Argument not an array reference" unless _is $orig, 'ARRAY';
553
554				# Get the word
555				my ($word, $where, $do, $result)
556				= @{$self->{RULES}->{$rule}}{('word', 'where', 'do', 'result')};
557
558				# Attempt to set this to the word
559				$word->set_segs(@$orig) \|\| return err($@);
560				$word->rule({ map { $_ => $self->$_($rule) } keys %property });
561
562				# Reset the counter
563				$self->{COUNT} = 0;
564
565				# Iterate over the segments
566				while ($word->next) {
567				my @word = $word->get_working_segs;
568				if ($where->(@word)) {
569				# Apply the rule
570				$do->(@word);
571				$self->{COUNT}++;
572				}
573				}
574
575				@$orig = $word->get_orig_segs;
576				$word->clear; # Free up memory
577
578				return @$orig;
579				}
580
581				# Makes rules appliable by their name
582				our $AUTOLOAD;
583				sub AUTOLOAD {
584				my $self = shift;
585				my $method = $AUTOLOAD;
586				$method =~ s/.*:://;
587
588				# For calling rules by name
589				if ($self->{RULES}->{$method}) {
590				# Compile functions which are rules
591				no strict 'refs';
592				*$method = sub {
593				my ($self, $word) = @_;
594				$self->apply($method, $word);
595				};
596
597				# Go to the rule
598				return $self->$method(@_);
599				}
600				die "No such method: $AUTOLOAD, called";
601				}
602
603				sub DESTROY {
604				$_[0]->clear;
605				}
606
607				sub apply_all {
608				my ($self, $word) = @_;
609				my %count = ();
610
611				my @persist = $self->persist; # Only get this once, for speed
612				for ($self->order) {
613				for (@persist) {
614				$self->apply($_, $word);
615				$count{$_} += $self->{COUNT};
616				}
617				for (@$_) {
618				$self->apply($_, $word);
619				$count{$_} += $self->{COUNT};
620				}
621				}
622				for (@persist) {
623				$self->apply($_, $word);
624				$count{$_} += $self->{COUNT};
625				}
626
627				# Set COUNT to be the hashref
628				$self->{COUNT} = \%count;
629
630				}
631
632				sub order {
633				my ($self, @items) = @_;
634				foreach (@items) {
635				$_ = [ $_ ] if not ref $_;
636				}
637				$self->{ORDER} = \@items if @items;
638				return @{$self->{ORDER}};
639				}
640
641				sub persist {
642				my $self = shift;
643				$self->{PERSIST} = \@_ if @_;
644				return @{$self->{PERSIST}};
645				}
646
647				sub count {
648				return $_[0]->{COUNT};
649				}
650
651				1;
652
653				__END__