File Coverage

blib/lib/Lingua/FeatureMatrix.pm
Criterion Covered Total %
statement 15 18 83.3
branch n/a
condition n/a
subroutine 5 6 83.3
pod n/a
total 20 24 83.3


line stmt bran cond sub pod time code
1             package Lingua::FeatureMatrix;
2            
3 1     1   27593 use 5.006;
  1         4  
  1         41  
4 1     1   5 use strict;
  1         1  
  1         26  
5 1     1   3 use warnings;
  1         6  
  1         28  
6 1     1   822 use Graph::Directed;
  1         186859  
  1         27  
7 1     1   9 use Carp;
  1         3  
  1         1686  
8             ##################################################################
9             # package globals
10             our $VERSION = '0.05';
11             ##################################################################
12             # data methods install using Class::MM
13             use Class::MethodMaker
14 0           new_with_init => 'new',
15             new_hash_init => 'hash_init',
16             get_set => [ qw [ _emeType _featureClassType ],
17             qw [_eme_new_opts _fclass_new_opts ] ],
18            
19             get_set => [ 'report', 'graph' ],
20            
21             get_set => 'Name',
22             object => [ Graph::Directed => 'implicature_graph',],
23             get_set => 'fh',
24             # object_list => [ Lingua::FeatureMatrix::Implicature => 'implicatures' ],
25            
26             # Lingua::FeatureMatrix::FeatureClass (or subclass) objs
27             hash => 'featureClasses',
28             # Lingua::FeatureMatrix::Eme subclass objs
29 0     0     hash => 'emes';
  0            
30            
31             use Lingua::FeatureMatrix::Implicature;
32             use Lingua::FeatureMatrix::FeatureClass;
33             ##################################################################
34             sub init {
35             my $self = shift;
36             my ($class) = ref($self);
37            
38             my (%args) = @_;
39            
40             my $file = $args{file};
41             if (not defined $file) {
42             croak "$class must be initialized with a 'file' => name",
43             " or 'file' => filehandle key/value pair";
44             }
45             if (ref($file)) {
46             $self->Name('');
47             if (UNIVERSAL::isa($file, 'IO::Handle') or UNIVERSAL::isa($file, 'GLOB')) {
48             $self->fh($file);
49             }
50             else {
51             croak "file handed in is a ", ref($file),
52             " and apparently not a descendant of IO::Handle!";
53             }
54             }
55             else {
56             $self->Name( $file );
57             require IO::File;
58             # open the filehandle
59             my $fh = IO::File->new($file) or croak "couldn't open $file: $!\n";
60             $self->fh($fh);
61             }
62            
63            
64            
65             if ($args{report}) {
66             $self->report($args{report});
67             }
68             else {
69             $self->report('');
70             }
71            
72             if ($args{graph}) {
73             $self->graph($args{graph});
74             }
75             else {
76             $self->graph('down');
77             }
78            
79             $self->implicature_graph( Graph::Directed->new() );
80             if ($self->graph eq 'down') {
81             $self->implicature_graph->set_attribute(label =>
82             'Feeding and Bleeding ' .
83             'relationships');
84             }
85             elsif ($self->graph eq 'up') {
86             $self->implicature_graph->set_attribute(label =>
87             'Possible but ignored feeding ' .
88             'and bleeding relationships');
89             }
90             else {
91             warn "don't know what the 'graph' parameter " .
92             $self->graph() . " means\n";
93             }
94             $self->implicature_graph->set_attribute(ratio => 1);
95            
96             # set up to know which subclasses to use:
97            
98             # we had better find out what Eme class this is
99             $self->_setEmeType($args{eme}, $args{eme_opts});
100            
101             # we might also have been instructed on a subtype of featureclass to use
102             $self->_setFeatureClass($args{featureclass}, $args{featureclass_opts});
103            
104             # so far, no requirement to be able to subclass the
105             # implicature. Leave it that way for now.
106            
107             # TO DO: add other more direct ways to initialize dynamically if
108             # needed
109             $self->_loadFile(); #$file);
110            
111             # fill out any features based on implicatures
112             $self->_completeSpecifications();
113             }
114             ##################################################################
115             sub _setEmeType {
116             my $self = shift;
117             my $class = ref($self);
118             my $emeType = shift;
119             my $eme_new_opts = shift;
120             if (not defined $emeType) {
121             croak "$class must be initialized with an ( " .
122             "'eme' => Lingua::FeatureMatrix::Eme-subclass-name ) key-value pair";
123             }
124            
125             # make sure that the class specified is loadable
126             # eval "require $emeType";
127             # if ($@) {
128             # croak "trouble loading $emeType: $@; exiting";
129             # }
130             if (not $emeType->isa('Lingua::FeatureMatrix::Eme')) {
131             croak "$emeType (provided as 'eme' parameter to $class)" .
132             " is not a Lingua::FeatureMatrix::Eme!\n";
133             }
134             if ($emeType eq 'Lingua::FeatureMatrix::Eme') {
135             croak "eme parameter to Lingua::FeatureMatrix must be a *derived* " .
136             "subclass, since L::FM::Eme has abstract functions\n";
137             }
138             if (my $error =$emeType->failsContract()) {
139             croak "$emeType fails to meet Lingua::FeatureMatrix::Eme contract: ",
140             $error, "\n";
141             }
142            
143             $self->_emeType( $emeType );
144            
145             # TO DO: undocumented feature allows more powerful subclassing of
146             # Eme objects, by passing more parameters to the Eme-building
147             # routines from this class
148             if (not defined $eme_new_opts) {
149             $self->_eme_new_opts( [] );
150             }
151             elsif (ref($eme_new_opts) eq 'ARRAY') {
152             # it's an arrayref. that's what we want.
153             $self->_eme_new_opts( $eme_new_opts );
154             }
155             elsif ( not ref($eme_new_opts) ) {
156             # it's a scalar. Package it up to be an arrayref anyway.
157             $self->_eme_new_opts( [ $eme_new_opts ] )
158             }
159             else {
160             croak "eme_opts parameter to ", ref($self),
161             " requires arrayref or scalar, not ", ref($eme_new_opts);
162             }
163             } # end _setEmeType
164             ##################################################################
165             sub _setFeatureClass {
166             # TO DO: user-document this undocumented feature, which allows
167             # subclassing of the FeatureClass object. Defaults to the base class
168             # if user ignores it, though.
169             my $self = shift;
170             my $featureclass = shift;
171             my $fClass_opts = shift;
172             if (not defined $featureclass) {
173             $featureclass = 'Lingua::FeatureMatrix::FeatureClass';
174             }
175            
176             if (not $featureclass->isa('Lingua::FeatureMatrix::FeatureClass')){
177             croak "featureclass parameter '$featureclass' to ",
178             ref($self), " not a Lingua::FeatureMatrix::FeatureClass " ,
179             "subclass!";
180             }
181             $self->_featureClassType( $featureclass );
182            
183             if (not defined $fClass_opts) {
184             $self->_fclass_new_opts( [] );
185             }
186             elsif (ref($fClass_opts) eq 'ARRAY') {
187             $self->_fclass_new_opts( $fClass_opts );
188             }
189             elsif ( not ref($fClass_opts) ) {
190             $self->_fclass_new_opts( [ $fClass_opts ] )
191             }
192             else {
193             croak "featureclass_opts parameter to ", ref($self),
194             " requires arrayref or scalar, not ", ref($fClass_opts);
195             }
196             }
197             ##################################################################
198             sub _loadFile {
199             # grabs all lines from config file, strips comments and spaces
200             my $self = shift;
201             my $file = $self->Name();
202            
203             # open(IN,$file) or die "cannot open $file $!\n";
204            
205             my $fh = $self->fh();
206             # sort data into maps, eme-specifications, and class-specifications
207            
208             while (<$fh>) {
209             # clean up lines
210             chomp;
211             $_ = set_utf($_);
212             tr/\x{FEFF}//d;
213             next if /^#/; # drop comments
214             s/\s//g; # and spaces
215             next if not length($_); # and skip blank lines
216            
217             # TO DO: eval the following block, and trap errors. Report them
218             # with line numbers for the user
219            
220             if (/^ \( (.+) \=\> (.+) \) $/x) {
221             # ( +vow => +son )
222             # ( +cons => *tense )
223             $self->_readImplicature($1, $2);
224             }
225             elsif (/^class (\S+) \=\> (.+) $/x) {
226             # class AFF => [ +stop +fric ]
227             $self->_readClass($1, $2);
228             }
229             elsif (/^ (\S+) (\[.*) $/x) {
230             # A [ +vow +low -tense ]
231             $self->_readEme($1, $2);
232             }
233             else {
234             # TO DO: document proper .dat file format
235             die "datafile $file has bad format in line $., '$_'\n";
236             }
237             }
238             close $fh or croak "couldn't close file ", $self->Name();
239             # close (IN) or die "can't close file $file $!\n";
240             }
241             ##################################################################
242             sub _readImplicature {
243             my $self = shift;
244             my $class = ref($self);
245            
246             my ($implier, $implicant) = @_;
247            
248             my %implier = $class->_getFeatureSet($implier);
249             my %implicant = $class->_getFeatureSet($implicant);
250            
251             # TO DO: check the featureset used here against whether it's legal.
252            
253             my $implicature =
254             Lingua::FeatureMatrix::Implicature->new(\%implier, \%implicant);
255            
256             $self->add_implicature($implicature);
257             }
258             ##################################################################
259             sub _readClass {
260             my $self = shift;
261             my $class = ref($self);
262            
263             my ($fClassName, $req_features) = @_;
264            
265             my %required = $class->_getFeatureSet($req_features);
266            
267             my $fClassType = $self->_featureClassType();
268            
269             my $featureClass =
270             $fClassType->new(name => $fClassName, features => \%required);
271             $self->featureClasses( $fClassName => $featureClass );
272             }
273             ##################################################################
274             sub _readEme {
275             my $self = shift;
276             my ($symbol, $features) = @_;
277            
278             if (defined $self->emes($symbol)) {
279             my $eme = $self->emes($symbol);
280             carp "at line $.: but $symbol previously defined as ",
281             $eme->dumpFeaturesToText($eme->listUserSpecified());
282             }
283            
284             my $class = ref($self);
285            
286             my (%features) = $class->_getFeatureSet($features);
287             my $eme =
288             $self->_emeType()->new(name => $symbol,
289             options => $self->_eme_new_opts(),
290             %features,
291             );
292            
293             $self->emes($symbol => $eme);
294             }
295             ##################################################################
296             sub add_implicature {
297             my $self = shift;
298             my Lingua::FeatureMatrix::Implicature $impl = shift;
299             # $self->implicature_graph(
300             my (@otherIndices) =
301             # map {$self->implicature_graph->get_attribute('object', $_)}
302             $self->implicature_graph->vertices();
303             my $insert_index = scalar (@otherIndices);
304            
305             $self->implicature_graph->add_vertex($insert_index);
306             # add a bunch of details about this object:
307            
308             # machine-readable 'object'
309             $self->implicature_graph->set_attribute(object => $insert_index,
310             $impl);
311             # human-readable 'label'
312             $self->implicature_graph->set_attribute(label => $insert_index,
313             $impl->dumpToText);
314            
315             # see how this new implicature fits into the dependency (ordering).
316             foreach my $otherIdx (@otherIndices) {
317             # yes, we have an n-squared scaling here. Tens of thousands of
318             # rules will have problems, but hundreds should still only take
319             # seconds, maximum. I hope. Don't really know how to build this
320             # graph any other way... :-/
321            
322             my $other =
323             $self->implicature_graph->get_attribute(object => $otherIdx);
324            
325             my (@inDeps) = $impl->dependsOn($other);
326             my (@outDeps) = $other->dependsOn($impl);
327            
328             if (@inDeps) {
329             if ($self->graph =~ /down/) {
330             $self->implicature_graph->add_edge($otherIdx, $insert_index);
331             $self->implicature_graph->set_attribute( label =>
332             $otherIdx, $insert_index,
333             (join " ", @inDeps));
334             }
335             }
336             if (@outDeps) {
337             # these are dependencies that suggest that it *could* have
338             # bled/fed rules higher up in the ordering.
339             if ($self->graph =~ /up/) {
340             $self->implicature_graph->add_edge($insert_index, $otherIdx);
341             $self->implicature_graph->set_attribute( label =>
342             $insert_index, $otherIdx,
343             (join " ", @outDeps));
344             }
345             if ($self->report() eq 'back-dependencies') {
346             carp $impl->dumpToText(), "(implicature number $insert_index)",
347             " could have been applied before ", $other->dumpToText(),
348             " (implicature number $otherIdx)",
349             " despite their input in the other order.";
350             }
351             }
352             } #end foreach otheridx
353            
354            
355             }
356             ##################################################################
357             sub _completeSpecifications {
358            
359             my $self = shift;
360            
361             # my $ordered_impls =
362             # Lingua::FeatureMatrix::Implicature->order
363            
364             # {
365             # my @ordered =
366             # Lingua::FeatureMatrix::Implicature->sortByRuleOrder($self->implicatures());
367             # $self->implicatures_clear();
368             # $self->implicatures_push(@ordered);
369             # }
370            
371             my (@orderedImpls) = $self->orderImplicatures();
372            
373             foreach my $emeName (sort $self->emes_keys) {
374            
375             my $eme = $self->emes($emeName);
376             # future might consider a toposort here...
377             foreach my $implicature (@orderedImpls) {
378             if ( $implicature->matches( $eme ) ) {
379             $implicature->apply( $eme );
380             }
381             }
382            
383             my (@missing) = $self->emes($emeName)->listUnspecified();
384             if (@missing) {
385             warn $self->_emeType . " '$emeName' " .
386             "(a Lingua::FeatureMatrix::Eme subclass) " .
387             "was not fully specified after application of implicatures " .
388             "(missing feature(s) [ @missing ]).\n";
389             }
390            
391             }
392             }
393             #################################################################
394             sub _getFeatureSet {
395             # returns a hash of ( feature1 => 1, feature2 => 0, feature3 => undef )
396             # style data, given "[+feature1 -feature2 *feature3]" style string
397             my $class = shift;
398             my $featureset = shift;
399            
400             if (not defined $featureset) {
401             confess;
402             }
403            
404             $featureset =~ s/^\[//; # remove leading & trailing brackets
405             $featureset =~ s/\]$//;
406            
407             # assumes no features use + or - within their names.
408             my (@featureData) = split( /([+*-])/, $featureset);
409            
410             @featureData = grep {$_ ne ''} @featureData;
411            
412             if ( not (@featureData % 2) ) {
413             # odd
414             }
415            
416             @featureData = map { $_ eq '-' ? 0 : $_ } @featureData;
417             @featureData = map { $_ eq '+' ? 1 : $_ } @featureData;
418             @featureData = map { $_ eq '*' ? (undef $_) : $_ } @featureData;
419            
420             # reversing the @featureData list puts keys before values, instead
421             # of the linguistic standard of value/key
422             # (e.g. +vow becomes vow => 1)
423             # note earlier listings override later
424             return reverse @featureData;
425             }
426             ##################################################################
427             # main public access method.
428             sub matchesFeatureClass {
429             my $self = shift;
430             my ($class) = ref($self);
431             my $symbol = shift;
432             my $className = shift;
433            
434             my $eme = $self->emes($symbol);
435             croak "unrecognized $class symbol $symbol" unless defined $eme;
436            
437             my $featureClass = $self->featureClasses($className);
438             croak "unrecognized class $className" unless defined $featureClass;
439            
440             return $featureClass->matches($eme);
441             }
442             ##################################################################
443             # public (probably debugging) method
444             sub listFeatureClassMembers {
445             my $self = shift;
446             my $className = shift;
447            
448             my $featureClass = $self->featureClasses($className);
449             croak "unrecognized class $className" unless defined $featureClass;
450            
451             my @symbols;
452            
453             foreach my $symbol (sort $self->emes_keys()) {
454             if ( $featureClass->matches( $self->emes($symbol) ) ) {
455             push @symbols, $symbol;
456             }
457             }
458             return @symbols;
459             }
460             ##################################################################
461             sub orderImplicatures {
462             # return "ordered" list of implicatures.
463             my $self = shift;
464            
465             # "By Any Means Necessary" --Malcolm X
466            
467            
468             return
469             map {$self->implicature_graph->get_attribute('object',$_)}
470             sort { $a <=> $b } $self->implicature_graph->vertices();
471            
472             # Schwartzian Transform is a good means.
473             # map {$_->[0]} # (3) strip sort index
474             # sort { $a->[1] <=> $b->[1] } # (2) sort items by the sort index
475             # map { [$_ => # (1) get a sort index for each
476             # $self->implicature_graph->get_attribute('insert_index', $_)] }
477             # $self->implicature_graph->vertices();
478            
479            
480             # future improvements will include a toposort call.
481            
482            
483             }
484             ##################################################################
485             sub findEquivalentEmes {
486             my $self = shift;
487             my (@symbols) = $self->emes_keys();
488            
489             # wantarray returns undef in void context
490             if (not defined wantarray()) {
491             carp "useless call to findEquivalentEmes() in void context";
492             return; # don't bother doing any work
493             }
494            
495             my %problems;
496             while (@symbols) {
497             my $thisSymbol = shift @symbols;
498             my $thisEme = $self->emes($thisSymbol);
499             foreach my $otherSymbol (@symbols) {
500             if ($thisEme->isEquivalent($self->emes($otherSymbol))) {
501             $problems{$thisSymbol} = $otherSymbol;
502             }
503             # else this eme not equivalent to any remaining eme
504             } # end foreach othersymbol
505             } # end while @symbols remaining
506            
507             if (wantarray()) {
508             return %problems;
509             }
510             else {
511             return (scalar(keys %problems));
512             }
513             } #end findEquivalentEmes
514             ##################################################################
515             # debugging public method
516             sub dumpToText {
517             # debugging function
518             my $self = shift;
519             my $lineLength = shift;
520            
521             local $Text::Wrap::columns = (defined $lineLength ?
522             $lineLength : $Text::Wrap::columns);
523             my (@text);
524             # loop over emes, dumping each one into filename
525             use Text::Wrap;
526             foreach my $symbol (sort $self->emes_keys) {
527             my $emeText = $self->emes($symbol)->dumpToText();
528            
529             my $line = $symbol . "\t" . $emeText;
530            
531             push @text, ($Text::Wrap::columns ?
532             wrap ('', "\t ", $line) : $line);
533             }
534            
535             return join ("\n", @text);
536             }
537             ##################################################################
538             sub set_utf {
539             # thanks to perlmonks' grantm
540             # (http://www.perlmonks.org/index.pl?node=grantm) for saving me
541             return pack "U0a*", join '', @_;
542             }
543             ##################################################################
544             1;
545            
546             __END__