File Coverage

blib/lib/WordNet/Similarity/GlossFinder.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1             # WordNet::Similarity::GlossFinder version 2.04
2             # (Last updated $Id: GlossFinder.pm,v 1.13 2008/03/27 06:21:17 sidz1979 Exp $)
3             #
4             # Module containing gloss-finding code for the various measures of semantic
5             # relatedness (lesk, vector).
6             #
7             # Copyright (c) 2005,
8             #
9             # Ted Pedersen, University of Minnesota Duluth
10             # tpederse at d.umn.edu
11             #
12             # Siddharth Patwardhan, University of Utah, Salt Lake City
13             # sidd at cs.utah.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31             #
32             # ------------------------------------------------------------------
33              
34             package WordNet::Similarity::GlossFinder;
35              
36             =head1 NAME
37              
38             WordNet::Similarity::GlossFinder - module to implement gloss finding methods
39             for WordNet::Similarity measures of semantic relatedness (specifically, lesk
40             and vector)
41              
42             =head1 SYNOPSIS
43              
44             use WordNet::QueryData;
45             my $wn = WordNet::QueryData->new;
46             defined $wn or die "Construction of WordNet::QueryData failed";
47              
48             use WordNet::Similarity::GlossFinder;
49             my $obj = WordNet::Similarity::GlossFinder->new ($wn);
50             my ($err, $errString) = $obj->getError ();
51             $err and die $errString;
52              
53             my $wps1 = 'england#n#1';
54             my $wps2 = 'winston_churchill#n#1';
55              
56             # get the glosses of these two synsets, since we are not using a
57             # configuation file to specify relations, we will only get the
58             # immediate glosses of the two wps entries. The default weight and
59             # relation appear in $weight and $relation - these can be modified
60             # via a configuration file.
61              
62             my ($wps1gloss, $wps2gloss, $weight, $relation ) = $obj -> getSuperGlosses ($wps1, $wps2);
63             print "$wps1gloss->[0]\n";
64             print "$wps2gloss->[0]\n";
65             print "$weight->[0]\n";
66             print "$relation->[0]\n";
67              
68             =head1 DESCRIPTION
69              
70             =head2 Introduction
71              
72             This class is derived from (i.e., is a sub-class of) WordNet::Similarity. Two
73             of the measures of similarity, provided in this package, viz. WordNet::Similarity::lesk
74             and WordNet::Similarity::vector deal with WordNet glosses. This module provides
75             methods for easy access to the required glosses.
76              
77             =head2 Methods
78              
79             This module inherits all the methods of WordNet::Similarity. Additionally,
80             the following methods are also defined.
81              
82             =head3 Public methods
83              
84             =over
85              
86             =cut
87              
88 1     1   5 use strict;
  1         2  
  1         21  
89 1     1   4 use warnings;
  1         2  
  1         28  
90 1     1   45 use WordNet::Similarity;
  0            
  0            
91             use File::Spec;
92             use WordNet::get_wn_info;
93              
94             our @ISA = qw/WordNet::Similarity/;
95              
96             our $VERSION = '2.04';
97              
98             WordNet::Similarity::addConfigOption("relation", 0, "p", undef);
99             WordNet::Similarity::addConfigOption("stop", 0, "p", undef);
100             WordNet::Similarity::addConfigOption("stem", 0, "i", 0);
101              
102             =item $measure->setPosList(Z<>)
103              
104             Specifies the parts of speech that measures derived from this module
105             support (namely, nouns, verbs, adjectives and adverbs).
106              
107             parameters: none
108              
109             returns: true
110              
111             =cut
112              
113             sub setPosList
114             {
115             my $self = shift;
116             $self->{n} = 1;
117             $self->{v} = 1;
118             $self->{a} = 1;
119             $self->{r} = 1;
120             return 1;
121             }
122              
123             =item $self->traceOptions(Z<>)
124              
125             Overrides method of same name in WordNet::Similarity. Prints module-specific
126             configuration options to the trace string (if tracing is on). GlossFinder
127             supports module specific options: relation, stop and stem.
128              
129             Parameters: none
130              
131             Returns: nothing
132              
133             =cut
134              
135             sub traceOptions
136             {
137             my $self = shift;
138             $self->{traceString} .= "relation file :: ".((defined $self->{relation}) ? ($self->{relation}) : "")."\n";
139             $self->{traceString} .= "stopwords file :: ".((defined $self->{stop}) ? ($self->{stop}) : "")."\n";
140             $self->{traceString} .= "stem :: ".((defined $self->{stem}) ? ($self->{stem}) : "")."\n";
141             $self->SUPER::traceOptions();
142             }
143              
144             =item $self->configure($file)
145              
146             Overrides the configure method in WordNet::Similarity. This method loads
147             various data files, such as the stop words and relations.
148              
149             Parameters: $file -- path of the configuration file.
150              
151             Returns: nothing
152              
153             =cut
154              
155             sub configure
156             {
157             my $self = shift;
158             my $class = ref $self || $self;
159             my %stopHash;
160             my $gwi;
161              
162             # Call the configure method in parent (WordNet::Similarity)
163             $self->SUPER::configure(@_);
164             $self->{maxCache} = 5000;
165            
166             # Initialize the stop list.
167             $self->{stopHash} = {};
168             my $wn = $self->{wn};
169            
170             # Use default relation file if specified by module...
171             $self->{relation} = $self->{relationDefault}
172             if(!($self->{relation}) && defined $self->{relationDefault} && $self->{relationDefault} ne "");
173              
174             # Load the stop list.
175             if(defined $self->{stop})
176             {
177             my $line;
178             my $stopFile = $self->{stop};
179              
180             if(open(STOP, $stopFile))
181             {
182             while($line = )
183             {
184             $line =~ s/[\r\f\n]//g;
185             $line =~ s/^\s+//;
186             $line =~ s/\s+$//;
187             $line =~ s/\s+/_/g;
188             $stopHash{$line} = 1;
189             $self->{stopHash}->{$line} = 1;
190             }
191             close(STOP);
192             }
193             else
194             {
195             $self->{errorString} .= "\nWarning ($class->configure()) - ";
196             $self->{errorString} .= "Unable to open $stopFile.";
197             $self->{error} = 1 if($self->{error} < 1);
198             }
199             }
200              
201             # so now we are ready to initialize the get_wn_info package with
202             # the wordnet object, 0/1 depending on if stemming is required and
203             # the stop hash
204             if($self->{stem})
205             {
206             $gwi = WordNet::get_wn_info->new($wn, 1, %stopHash);
207             $self->{gwi} = $gwi;
208             }
209             else
210             {
211             $gwi = WordNet::get_wn_info->new($wn, 0, %stopHash);
212             $self->{gwi} = $gwi;
213             }
214              
215             # Load the relations
216             $self->_loadRelationFile();
217              
218             # Initialize traces for relations...
219             $self->{relationTraces} = [];
220             my $i = 0;
221             while(defined $self->{functions}->[$i])
222             {
223             my $functionsString = "";
224             my $weight = $self->{weights}->[$i];
225            
226             # see if any traces reqd. if so, create the functions string
227             # however don't send it to the trace string immediately - will
228             # print it only if there are any overlaps for this rel pair
229             $functionsString = "Functions: ";
230             my $j = 0;
231             while(defined $self->{functions}->[$i]->[0]->[$j])
232             {
233             $functionsString .= ($self->{functions}->[$i]->[0]->[$j])." ";
234             $j++;
235             }
236              
237             $functionsString .= "- ";
238             $j = 0;
239             while(defined $self->{functions}->[$i]->[1]->[$j])
240             {
241             $functionsString .= ($self->{functions}->[$i]->[1]->[$j])." ";
242             $j++;
243             }
244             push(@{$self->{relationTraces}}, $functionsString);
245             $i++;
246             }
247             }
248              
249             =item $self->getSuperGlosses($wps1, $wps2)
250              
251             This method returns a list of large blocks of concatenated glosses (super-gloss) for
252             each specified synset. A super-gloss is the block of text formed by concatenating the
253             glosses of a synset with glosses of synsets related to it in WordNet. "Related"
254             synsets are identified by specific relations specified in the "relations" file.
255             If no relations file was specified in the configuration, only the gloss of that
256             synset is returned.
257              
258             Parameters: wps1 and wps2 -- two synsets.
259              
260             Returns: List of superglosses for both synsets (2-D array).
261              
262             =cut
263              
264             sub getSuperGlosses
265             {
266             my $self = shift;
267             my $wps1 = shift;
268             my $wps2 = shift;
269             my $class = ref $self || $self;
270             my $rArray = [];
271             my $gwi = $self->{gwi};
272              
273             # NOTE: Thanks to Wybo Wiersma for providing the following (faster)
274             # super-gloss code.
275              
276             # check if the supergloss of the left word is in the cache.
277             # If it is not, add it.
278             if(!defined($self->{cache}->[0]->{$wps1}))
279             {
280             push(@{$self->{cachelist}->[0]}, $wps1);
281              
282             # Remove the oldest cache-entry if there's no more room
283             if(scalar(@{$self->{cachelist}->[0]}) > $self->{maxCache})
284             {
285             my $todel = shift(@{$self->{cachelist}->[0]});
286             delete ($self->{cache}->[0]->{$todel});
287             }
288            
289             $self->{cache}->[0]->{$wps1} = $self->_getSuperGlosses($wps1, $gwi, 0);
290             }
291            
292             # check if the supergloss of the right word is in the cache.
293             # If it is not, add it.
294             if(!defined($self->{cache}->[1]->{$wps2}))
295             {
296             push(@{$self->{cachelist}->[1]}, $wps2);
297              
298             # Remove the oldest cache-entry if there's no more room
299             if(scalar(@{$self->{cachelist}->[1]}) > $self->{maxCache})
300             {
301             my $todel = shift(@{$self->{cachelist}->[1]});
302             delete ($self->{cache}->[1]->{$todel});
303             }
304            
305             $self->{cache}->[1]->{$wps2} = $self->_getSuperGlosses($wps2, $gwi, 1);
306             }
307            
308             return ($self->{cache}->[0]->{$wps1}, $self->{cache}->[1]->{$wps2}, $self->{weights}, $self->{relationTraces});
309             }
310              
311             sub _getSuperGlosses()
312             {
313             my $self = shift;
314             my ($wps, $gwi, $zron) = @_;
315             my @stringArray;
316              
317             # and now go thru the functions array, get the strings
318             my $i = 0;
319             while(defined $self->{functions}->[$i])
320             {
321             # now get the string for the first set of synsets
322             my %seth = ();
323             $seth{$wps} = 1;
324             my @arguments = \%seth;
325            
326             # apply the functions to the arguments, passing the output of
327             # the inner functions to the inputs of the outer ones
328             my $j = 0;
329             while(defined $self->{functions}->[$i]->[$zron]->[$j])
330             {
331             my $fn = $self->{functions}->[$i]->[$zron]->[$j];
332             @arguments = $gwi->$fn(@arguments);
333             $j++;
334             }
335            
336             # finally we should have one cute little string!
337             push(@stringArray, $arguments[0]);
338             $i++;
339             }
340              
341             return \@stringArray;
342             }
343              
344             =back
345              
346             =head3 Private Methods
347              
348             =over
349              
350             =item $self->_loadRelationFile()
351              
352             This method loads relations from a relation file.
353              
354             Parameters: none
355              
356             Returns: nothing
357              
358             =back
359              
360             =cut
361              
362             sub _loadRelationFile
363             {
364             my $self = shift;
365             my $class = ref $self || $self;
366             my $gwi = $self->{gwi};
367              
368             if($self->{relation})
369             {
370             my $header;
371             my $relation;
372              
373             if(open (RELATIONS, $self->{relation}))
374             {
375             $header = ;
376             $header =~ s/[\r\f\n]//g;
377             $header =~ s/\s+//g;
378             if(defined $header && $header =~ /RelationFile/)
379             {
380             my $index = 0;
381             $self->{functions} = ();
382             $self->{weights} = ();
383             while($relation = )
384             {
385             $relation =~ s/[\r\f\n]//g;
386            
387             # now for each line in the file, extract the
388             # nested functions if any, check if they are defined,
389             # if it makes sense to nest them, and then finally put
390             # them into the @functions triple dimensioned array!
391            
392             # remove leading/trailing spaces from the relation
393             $relation =~ s/^\s*(\S*?)\s*$/$1/;
394              
395             next if($relation =~ /^$/);
396            
397             # now extract the weight if any. if no weight, assume 1
398             if($relation =~ /(\S+)\s+(\S+)/)
399             {
400             $relation = $1;
401             $self->{weights}->[$index] = $2;
402             }
403             else
404             {
405             $self->{weights}->[$index] = 1;
406             }
407              
408             # check if we have a "proper" relation, that is a relation in
409             # there are two blocks of functions!
410             if($relation !~ /(.*)-(.*)/)
411             {
412             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
413             $self->{errorString} .= "Bad file format ($self->{relation}).";
414             $self->{error} = 2;
415             close RELATIONS;
416             return;
417             }
418            
419             # get the two parts of the relation pair
420             my @twoParts;
421             my $l;
422             $twoParts[0] = $1;
423             $twoParts[1] = $2;
424            
425             # process the two parts and put into functions array
426             for($l = 0; $l < 2; $l++)
427             {
428             #no strict 'subs';
429            
430             $twoParts[$l] =~ s/[\s\)]//g;
431             my @functionArray = split(/\(/, $twoParts[$l]);
432            
433             my $j = 0;
434             my $fn = $functionArray[$#functionArray];
435             unless($gwi->can($fn))
436             {
437             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
438             $self->{errorString} .= "Undefined function ($functionArray[$#functionArray]) in relations file.";
439             $self->{error} = 2;
440             close RELATIONS;
441             return;
442             }
443            
444             $self->{functions}->[$index]->[$l]->[$j++] = $functionArray[$#functionArray];
445             my $input;
446             my $output;
447             my $dummy;
448             my $k;
449            
450             for ($k = $#functionArray-1; $k >= 0; $k--)
451             {
452             my $fn2 = $functionArray[$k];
453             my $fn3 = $functionArray[$k+1];
454             if(!($gwi->can($fn2)))
455             {
456             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
457             $self->{errorString} .= "Undefined function ($functionArray[$k]) in relations file.";
458             $self->{error} = 2;
459             close(RELATIONS);
460             return;
461             }
462            
463             ($input, $dummy) = $gwi->$fn2($dummy, 1);
464             ($dummy, $output) = $gwi->$fn3($dummy, 1);
465            
466             if($input != $output)
467             {
468             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
469             $self->{errorString} .= "Invalid function combination - $functionArray[$k]($functionArray[$k+1]).";
470             $self->{error} = 2;
471             close(RELATIONS);
472             return;
473             }
474            
475             $self->{functions}->[$index]->[$l]->[$j++] = $functionArray[$k];
476             }
477            
478             # if the output of the outermost function is synset array (1)
479             # wrap a glos around it
480             my $xfn = $functionArray[0];
481             ($dummy, $output) = $gwi->$xfn($dummy, 1);
482             if($output == 1)
483             {
484             $self->{functions}->[$index]->[$l]->[$j++] = "glos";
485             }
486             }
487            
488             $index++;
489             }
490             }
491             else
492             {
493             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
494             $self->{errorString} .= "Bad file format ($self->{relation}).";
495             $self->{error} = 2;
496             close(RELATIONS);
497             return;
498             }
499             close(RELATIONS);
500             }
501             else
502             {
503             $self->{errorString} .= "\nError (${class}::_loadRelationFile()) - ";
504             $self->{errorString} .= "Unable to open $self->{relation}.";
505             $self->{error} = 2;
506             return;
507             }
508             }
509             else
510             {
511             $self->{weights}->[0] = 1;
512             $self->{functions}->[0]->[0]->[0] = "glosexample";
513             $self->{functions}->[0]->[1]->[0] = "glosexample";
514             return;
515             }
516             }
517              
518             1;
519              
520             __END__