File Coverage

blib/lib/WordNet/Similarity.pm
Criterion Covered Total %
statement 28 30 93.3
branch n/a
condition n/a
subroutine 10 10 100.0
pod n/a
total 38 40 95.0


line stmt bran cond sub pod time code
1             # WordNet::Similarity.pm version 2.05
2             # (Last updated $Id: Similarity.pm,v 1.51 2015/10/04 15:49:59 tpederse Exp $)
3             #
4             # Module containing the version information and pod
5             # for the WordNet::Similarity package, and all measures are
6             # derived from this class.
7             #
8             # Copyright (c) 2005,
9             #
10             # Ted Pedersen, University of Minnesota Duluth
11             # tpederse at d.umn.edu
12             #
13             # Siddharth Patwardhan, University of Utah, Salt Lake City
14             # sidd at cs.utah.edu
15             #
16             # Jason Michelizzi, Univeristy of Minnesota Duluth
17             # mich0212 at d.umn.edu
18             #
19             # Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh
20             # banerjee+ at cs.cmu.edu
21             #
22             # This program is free software; you can redistribute it and/or
23             # modify it under the terms of the GNU General Public License
24             # as published by the Free Software Foundation; either version 2
25             # of the License, or (at your option) any later version.
26             #
27             # This program is distributed in the hope that it will be useful,
28             # but WITHOUT ANY WARRANTY; without even the implied warranty of
29             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30             # GNU General Public License for more details.
31             #
32             # You should have received a copy of the GNU General Public License
33             # along with this program; if not, write to
34             #
35             # The Free Software Foundation, Inc.,
36             # 59 Temple Place - Suite 330,
37             # Boston, MA 02111-1307, USA.
38             #
39             # ------------------------------------------------------------------
40              
41             package WordNet::Similarity;
42              
43             =head1 NAME
44              
45             WordNet::Similarity - Perl modules for computing measures of semantic
46             relatedness.
47              
48             =head1 SYNOPSIS
49              
50             =head2 Basic Usage Example
51              
52             use WordNet::QueryData;
53              
54             use WordNet::Similarity::path;
55              
56             my $wn = WordNet::QueryData->new;
57              
58             my $measure = WordNet::Similarity::path->new ($wn);
59              
60             my $value = $measure->getRelatedness("car#n#1", "bus#n#2");
61              
62             my ($error, $errorString) = $measure->getError();
63              
64             die $errorString if $error;
65              
66             print "car (sense 1) <-> bus (sense 2) = $value\n";
67              
68             =head2 Using a configuration file to initialize the measure
69              
70             use WordNet::Similarity::path;
71              
72             my $sim = WordNet::Similarity::path->new($wn, "mypath.cfg");
73              
74             my $value = $sim->getRelatedness("dog#n#1", "cat#n#1");
75              
76             ($error, $errorString) = $sim->getError();
77              
78             die $errorString if $error;
79              
80             print "dog (sense 1) <-> cat (sense 1) = $value\n";
81              
82             =head2 Printing traces
83              
84             print "Trace String -> ".($sim->getTraceString())."\n";
85              
86             =head1 DESCRIPTION
87              
88             =head2 Introduction
89              
90             We observe that humans find it extremely easy to say if two words are
91             related and if one word is more related to a given word than another. For
92             example, if we come across two words, 'car' and 'bicycle', we know they
93             are related as both are means of transport. Also, we easily observe that
94             'bicycle' is more related to 'car' than 'fork' is. But is there some way to
95             assign a quantitative value to this relatedness? Some ideas have been put
96             forth by researchers to quantify the concept of relatedness of words, with
97             encouraging results.
98              
99             Eight of these different measures of relatedness have been implemented in
100             this software package. A simple edge counting measure and a random measure
101             have also been provided. These measures rely heavily on the vast store of
102             knowledge available in the online electronic dictionary -- WordNet. So, we
103             use a Perl interface for WordNet called WordNet::QueryData to make it
104             easier for us to access WordNet. The modules in this package REQUIRE that
105             the WordNet::QueryData module be installed on the system before these
106             modules are installed.
107              
108             =head2 Function
109              
110             The following function is defined:
111              
112             =over
113              
114             =cut
115              
116 19     19   11425 use strict;
  19         31  
  19         485  
117 19     19   85 use Carp;
  19         29  
  19         1472  
118 19     19   85 use Exporter;
  19         29  
  19         708  
119              
120             # please use these, but remember that constants are not interpolated:
121             # print "Root: ROOT\n"; # wrong!
122             # print "Root: ".ROOT."\n"; # right
123             # m/ROOT/; # wrong!
124             # $pattern = ROOT; m/$pattern/; # okay
125 19     19   90 use constant ROOT => "*Root*";
  19         88  
  19         1499  
126 19     19   88 use constant ROOT_N => "*Root*#n#1";
  19         41  
  19         895  
127 19     19   84 use constant ROOT_V => "*Root*#v#1";
  19         26  
  19         863  
128              
129             # JM 12/9/03
130             # we would like this to be numeric
131 19     19   86 use constant UNRELATED => -1_000_000;
  19         27  
  19         1242  
132              
133             # if we are using an unlimited cache size, it's easier to fake an
134             # unlimited cache with a really big value.
135 19     19   95 use constant UNLIMITED_CACHE => 2_147_483_647;
  19         35  
  19         841  
136              
137 19     19   90 use constant DEFAULT_CACHE => 5_000;
  19         38  
  19         811  
138              
139 19     19   8699 use WordNet::Tools;
  0            
  0            
140              
141             our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
142              
143             @ISA = qw(Exporter);
144              
145             %EXPORT_TAGS = ();
146              
147             @EXPORT_OK = ();
148              
149             @EXPORT = ();
150              
151             $VERSION = '2.07';
152              
153             # a hash to contain the module-specific configuration options.
154             our %config_options;
155              
156             =item addConfigOption ($name, $required, $type, $default_val)
157              
158             Adds the configuration option, $name, to the list of known config
159             options (cf. configure()). If $required is true, then the option
160             requires a value; otherwise, the value is optional, and the default
161             value $default_val is used if a value is not specified in the config
162             file. $type is the type of value the option takes. It can be
163             'i' for integer, 'f' for floating-point, 's' for string, or 'p' for
164             a file name.
165              
166             returns: nothing, but will C on error. You can put the call to
167             this function in an C block to trap the exception (N.B., the
168             C form of C does not significantly degrade performance,
169             unlike the C form of C. See C).
170              
171             =cut
172              
173             sub addConfigOption
174             {
175             my $name = shift;
176             my $required = shift;
177             my $type = shift;
178             my $default = shift;
179              
180             my ($package, $filename, $line) = caller;
181             if ($package eq 'vector') {
182             print "vector\n"
183             }
184              
185             $config_options{$name}->{$package} = [($required ? 1 : 0), $type, $default];
186             }
187              
188             =back
189              
190             =head2 Methods
191              
192             The following methods are defined in this package:
193              
194             =head3 Public methods
195              
196             =over
197              
198             =item $obj->new ($wn, $config_file)
199              
200             The constructor for WordNet::Similarity::* objects.
201              
202             Parameters: $wn is a WordNet::QueryData object, $config_file is a
203             configuration file (optional).
204              
205             Return value: the new blessed object
206              
207             =cut
208              
209             sub new
210             {
211             my $class = shift;
212             my $this = {};
213              
214             $class = ref $class || $class;
215              
216             $this->{errorString} = '';
217             $this->{error} = 0;
218              
219             if ($class eq 'WordNet::Similarity') {
220             $this->{errorString} .= "\nWarning (${class}::new()) - ";
221             $this->{errorString} .= "This class is intended to be an abstract base class for a measure. Your class should override it.";
222             $this->{error} = 1;
223             }
224              
225             $this->{wn} = shift;
226             unless (defined $this->{wn}) {
227             $this->{errorString} .= "\nError (${class}::new()) - ";
228             $this->{errorString} .= "A WordNet::QueryData object is required.";
229             $this->{error} = 2;
230             }
231             else {
232             # queryWord() in older versions of WordNet::QueryData was broken
233             $this->{wn}->VERSION (1.30);
234             my $wntools = WordNet::Tools->new($this->{wn});
235             unless (defined $wntools) {
236             $this->{errorString} .= "\nError (${class}::new()) - ";
237             $this->{errorString} .= "Error creating WordNet::Tools object.";
238             $this->{error} = 2;
239             }
240             $this->{wntools} = $wntools;
241             }
242              
243             bless $this, $class;
244              
245             $this->initialize (shift) if $this->{error} < 2;
246              
247             $this->setPosList();
248              
249             # [trace]
250             if ($this->{trace}) {
251             $this->{traceString} = "${class} object created:\n";
252             $this->{traceString} .= "trace :: $this->{trace}\n";
253             $this->{traceString} .= "cache :: $this->{doCache}\n";
254             $this->{traceString} .= "cache size :: $this->{maxCacheSize}\n";
255             $this->traceOptions ();
256             }
257             # [/trace]
258              
259             return $this;
260             }
261              
262              
263             =item $obj->initialize ($config_file)
264              
265             Performs some initialization on the module.
266              
267             Parameter: the location of a configuration file
268              
269             Returns: nothing
270              
271             =cut
272              
273             sub initialize
274             {
275             my $self = shift;
276              
277             # initialize cache--caching is ON by default
278             $self->{doCache} = 1;
279             $self->{simCache} = ();
280             $self->{traceCache} = ();
281             $self->{cacheQ} = ();
282              
283             # (JM - 11/26/03)
284             # Using unlimited cache can cause simCache and esp. traceCache
285             # to use huge amounts of memory if a lot of queries are performed.
286             # $self->{maxCacheSize} = UNLIMITED_CACHE;
287              
288             $self->{maxCacheSize} = DEFAULT_CACHE;
289              
290             # initialize tracing--tracing is OFF by default
291             $self->{trace} = 0;
292              
293             # JM 1/26/04
294             # moved option for root node to PathFinder.pm
295             #
296             # use a virtual root node (if applicable)
297             # six of the measures (res, lin, jcn, path, wup, lch) use a virtual
298             # root node in some way, and it is present by default in these cases.
299             # Three of the measures--path, wup, and lch--allow this root node to be
300             # turned off (i.e., the measure would be run without a root node).
301             # $self->{rootNode} = 1;
302              
303             return $self->configure (@_);
304             }
305              
306              
307             =item $obj->configure($config_file)
308              
309             Parses a configuration file.
310              
311             If you write a module and want to add a new configuration option, you
312             can use the addConfigOption function to specify the name and nature
313             of the option.
314              
315             The value of the option is place in "self": $self->{optionname}.
316              
317             parameter: a file name
318              
319             returns: true if parsing of config file was successful, false on error
320              
321             =cut
322              
323             sub configure
324             {
325             my $self = shift;
326             my $file = shift;
327             my $class = ref $self || $self;
328              
329             while (my ($opt, $classHash) = each %config_options) {
330             while (my ($class, $arrayRef) = each %$classHash) {
331             if ($self->isa ($class)) {
332             $self->{$opt} = $arrayRef->[2];
333             }
334             }
335             }
336             return unless defined $file;
337              
338             # my %options = %config_options;
339             # foreach my $optionstr (@options) {
340             # next unless $optionstr;
341             # my $patternstr = '^(\w+)([=:])([ifps])$';
342             # unless ($optionstr =~ m/$patternstr/o) {
343             # $self->{errorString} .= "\nWarning (${class}::configure) - ";
344             # $self->{errorString} .= "Bad option string $optionstr: option strings";
345             # $self->{errorString} .= " must match the pattern ${patternstr}.";
346             # $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
347             # }
348             # $options{$1} = [$2, $3];
349             # }
350              
351             unless (open CF, $file) {
352             my $class = ref $self || $self;
353             $self->{errorString} .= "\nError (${class}::configure) - ";
354             $self->{errorString} .= "Unable to open config file $file.";
355             $self->{error} = 2;
356             return undef;
357             }
358              
359             $_ = ;
360             unless (m/^$class/) {
361             close CF;
362             my $class = ref $self || $self;
363             $self->{errorString} .= "\nError (${class}::configure()) - ";
364             $self->{errorString} .= "$file does not appear to be a config file for $class.";
365             $self->{error} = 2;
366             return undef;
367             }
368              
369             # keep track of which options we've already seen
370             my %optionCache;
371              
372             my %rtn;
373             OPTION_READ:
374             while () {
375             s/\s+|\#.*//g; # ignore comments
376              
377             # JM 12/4/03 (#3)
378             # edited the below to
379             # (1) ensure the values for options are valid
380             # (2) handle options without values in a consistent manner
381              
382             if ($_ eq "") {
383             next;
384             }
385              
386             # JM 1/9/04 (#1)
387             # added the following block to check for repeated options
388             my ($option, $value) = m/^(\w+)::(.*)$/;
389             if ($option) {
390             unless (defined $optionCache{$option}) {
391             $optionCache{$option} = defined $value ? $value : 1;
392             }
393             else {
394             # we've already seen this option
395             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
396             $self->{errorString} .= "configuration option '$option' encountered twice in config file";
397             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
398             }
399             }
400              
401             if (m/^trace::(.*)/i) {
402             my $trace = $1;
403              
404             # JM 12/4/03 (#3)
405             # $self->{trace} = 1;
406             # $self->{trace} = $trace if $trace =~ m/^[012]$/;
407              
408             next OPTION_READ if $trace eq "";
409             if ($trace =~ m/^[012]$/) {
410             $self->{trace} = $trace;
411             }
412             else {
413             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
414             $self->{errorString} .= "$trace is an invalid value for option trace.";
415             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
416             }
417             }
418             elsif (m/^cache::(.*)/i) {
419             my $cache = $1;
420              
421             # JM 12/4/03 (#3)
422             # $self->{doCache} = 1;
423             # $self->{doCache} = $cache if $cache =~ m/^[01]$/;
424              
425             next OPTION_READ if $cache eq "";
426             if ($cache =~ m/^[01]$/) {
427             $self->{doCache} = $cache;
428             }
429             else {
430             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
431             $self->{errorString} .= "$cache is an invalid value for option cache.";
432             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
433             }
434             }
435             elsif (m/^maxCacheSize::(.*)/i) {
436             my $mcs = $1;
437              
438             # JM 12/4/03 (#3)
439             # $self->{maxCacheSize} = DEFAULT_CACHE;
440             # if ($mcs =~ /unlimited/i) {
441             # $self->{maxCacheSize} = UNLIMITED_CACHE;
442             # next;
443             # }
444             # $self->{maxCacheSize} = $mcs if defined ($mcs) and $mcs =~ m/^\d+$/;
445              
446             next OPTION_READ if $mcs eq "";
447             if ($mcs =~ m/^unlimited/i) {
448             $self->{maxCacheSize} = UNLIMITED_CACHE;
449             }
450             elsif ($mcs =~ m/^\d+$/) {
451             $self->{maxCacheSize} = $mcs + 0;
452             }
453             else {
454             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
455             $self->{errorString} .= "$mcs is an invalid value for option maxCacheSize.";
456             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
457             }
458             }
459             # JM 1/26/04
460             # moved code for the rootNode option to PathFinder.pm
461             else {
462             OPTION_LOOP:
463             foreach my $option (keys %config_options) {
464             my $found = 0;
465             CLASS_LOOP:
466             foreach my $class (keys %{$config_options{$option}}) {
467             if ($self->isa ($class)
468             and defined $config_options{$option}->{$class}) {
469             $found = $class;
470             last CLASS_LOOP;
471             }
472             }
473             next OPTION_LOOP unless $found;
474            
475             if (not defined $config_options{$option}->{$found}) {
476             print STDERR "$option $class\n";
477             }
478             my ($required, $type, $dflt)= @{$config_options{$option}->{$found}};
479              
480              
481             if (m/^${option}::(.*)$/i) {
482             my $t = $1;
483             if ($t =~ m/^\s*$/) {
484             if ($required) {
485             #error
486             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
487             $self->{errorString} .= "Option $option has no value.";
488             $self->{error} .= $self->{error} > 1 ? $self->{error} : 1;
489             }
490             else {
491             # do nothing
492             $self->{$option} = $dflt
493             }
494             }
495             else {
496             if ($type eq 'i') {
497             # JM 12/4/03 (#3)
498             # $self->{$option} = int ($t);
499             if ($t =~ m/^\d+$/) {
500             $self->{$option} = $t + 0;
501             }
502             else {
503             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
504             $self->{errorString} .=
505             "$t is an invalid value for option $option.";
506             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
507             }
508             }
509             elsif ($type eq 'f') {
510             # JM 12/4/03 (#3)
511              
512             # check if this is a float
513             if ($t =~ /^[+-]?(?:\d+\.?\d*|\.\d+)(?:e[+-]?\d+)?$/) {
514             $self->{$option} = $t + 0.0;
515             }
516             else {
517             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
518             $self->{errorString} .=
519             "$t is an invalid value for option $option.";
520             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
521             }
522             }
523             elsif ($type eq 'p') {
524             if (-e $t) {
525             $self->{$option} = $t;
526             }
527             else {
528             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
529             $self->{errorString} .=
530             "$t is not a valid filename for option $option.";
531             $self->{error} = ($self->{error} > 1) ? $self->{error} : 1;
532             }
533             }
534             elsif ($type eq 's') {
535             $self->{$option} = $t;
536             }
537             else {
538             $self->{errorString} .= "\nWarning (${class}::configure()) - ";
539             $self->{errorString} .=
540             "Unknown/invalid option type $type.\n";
541             }
542             }
543             next OPTION_READ;
544             }
545             }
546             # error
547             s/::.*//;
548             my $class = ref $self || $self;
549             $self->{errorString} .="\nWarning (${class}::configure()) - ";
550             $self->{errorString} .= "Ignoring unrecognized option '$_'.";
551             $self->{error} = $self->{error} > 1 ? $self->{error} : 1;
552             }
553             }
554             close CF;
555             return 1;
556             }
557              
558             =item $obj->getTraceString(Z<>)
559              
560             Returns the current trace string and resets the trace string to empty. If
561             tracing is turned off, then an empty string will always be returned.
562              
563             =cut
564              
565             sub getTraceString {
566             my $self = shift;
567              
568             return '' unless $self->{trace} and defined $self->{traceString};
569             my $str = $self->{traceString};
570             $self->{traceString} = '';
571             $str =~ s/\n{2,}$//;
572             return $str;
573             }
574              
575             =item $obj->getError(Z<>)
576              
577             Checks to see if any errors have occurred.
578             Returns a list of the form S<($level, $string)>. If $level is 0, then
579             no errors have occurred; if $level is non-zero, then an error has occurred.
580             A value of 1 is considered a warning, and a value of 2 is considered an
581             error. If $level is non-zero, then $string will have a (hopefully)
582             meaningful error message.
583              
584             =cut
585              
586             sub getError {
587             my $self = shift;
588             my $error = $self->{error};
589             my $errorString = $self->{errorString};
590             $self->{error} = 0;
591             $self->{errorString} = "";
592             $errorString =~ s/^[\r\n\t ]+//;
593             return ($error, $errorString);
594             }
595              
596              
597             =item $obj->traceOptions(Z<>)
598              
599             Prints module-specific options to the trace string. Any module that
600             adds configuration options via addConfigOption should override this
601             method.
602              
603             Options should be printed out using the following format:
604              
605             $self->{traceString} .= "option_name :: $option_value\n"
606              
607             Note that the option name is separated from its current value by a
608             space, two colons, and another space. The string should be terminated
609             by a newline.
610              
611             Since multiple modules may be overriding this method, any module
612             that overrides this method should insure that the superclass'
613             method gets called as well. You do this by putting this line at
614             the end of your method:
615              
616             $self->SUPER::traceOptions();
617              
618             returns: nothing
619              
620             =cut
621              
622             # JM 12/5/03 (#1)
623             sub traceOptions {
624             # nothing to do here, this is a just a placeholder
625             # subclasses should override this to print all config options to
626             # the traceString
627             }
628              
629              
630             =item $obj->parseWps($synset1, $synset2)
631              
632             parameters: synset1, synset2
633              
634             returns: a reference to an array [$word1, $pos1, $sense1, $offset1, $word2,
635             $pos2, $sense2, $offset2] or undef
636              
637             This method checks the format of the two input synsets by calling
638             validateSynset() for each synset.
639              
640             If the synsets are in wps format, a reference to an array will be returned.
641             This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2,
642             $sense2, $offset2] where $word1 is the word part of $wps1, $pos1, is the
643             part of speech of $wps1, $sense1 is the sense from $wps. $offset1 is the
644             offset for $wps1.
645              
646             If an error occurs (such as a synset being poorly-formed), then undef
647             is returned, the error level is set to non-zero, and an error message is
648             appended to the error string.
649              
650             =cut
651              
652             sub parseWps
653             {
654             my $self = shift;
655             my $wps1 = shift;
656             my $wps2 = shift;
657              
658             my $class = ref $self || $self;
659              
660             # Undefined input cannot go unpunished.
661             unless (defined $wps1 and defined $wps2 and length $wps1 and length $wps2) {
662             $self->{errorString} .= "\nWarning (${class}::parseWps()) - ";
663             $self->{errorString} .= "Variable for input synset ".(length($wps1) ? 2 : 1)." undefined.";
664             $self->{error} = ($self->{error} < 1) ? 1 : $self->{error};
665             return undef;
666             }
667              
668             my ($word1, $pos1, $sense1, $offset1) = $self->validateSynset ($wps1);
669             my ($word2, $pos2, $sense2, $offset2) = $self->validateSynset ($wps2);
670              
671             # Check to see if validation of synsets succeeded, if not, then
672             # bail out (error message already set by validateSynset).
673             unless (defined $word1 and defined $pos1 and defined $sense1
674             and defined $word2 and defined $pos2 and defined $sense2) {
675             return undef;
676             }
677              
678             return [$word1, $pos1, $sense1, $offset1, $word2, $pos2, $sense2, $offset2];
679             }
680              
681              
682             =item $obj->validateSynset($synset)
683              
684             parameter: synset
685              
686             returns: a list or undef on error
687              
688             synset is a string in word#pos#sense format
689              
690             This method does the following:
691              
692             =over
693              
694             =item 1.
695              
696             Verifies that the synset is well-formed (i.e., that it consists of three
697             parts separated by #s, the pos is one of {n, v, a, r} and that sense
698             is a natural number). A synset that matches the pattern '[^\#]+\#[nvar]\#\d+'
699             is considered well-formed.
700              
701             =item 2.
702              
703             Checks if the synset exists by trying to find the offset for the synset
704              
705             =back
706              
707             If any of these tests fails, then the error level is set to non-zero, a
708             message is appended to the error string, and undef is returned.
709              
710             If the synset is well-formed and exists, then a list is returned that
711             has the format ($word, $pos, $sense, $offset).
712              
713             =cut
714              
715             sub validateSynset
716             {
717             my $self = shift;
718             my $synset = shift;
719             my $class = ref $self || $self;
720              
721             # check to see that synset is in w#p#s format
722              
723             unless (defined ($synset) and length ($synset)) {
724             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
725             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
726             $self->{errorString} .= "Variable representing synset is undefined (or an empty string).";
727             return undef;
728             }
729              
730             my ($word, $pos, $sense) = split (/\#/, $synset);
731              
732             unless (defined $word) {
733             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
734             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
735             $self->{errorString} .= "Invalid synset ($synset): word undefined.";
736             return undef;
737             }
738              
739             unless (defined $pos) {
740             no strict 'vars';
741             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
742             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
743             $self->{errorString} .= "Invalid synset ($synset): part of speech undefined.";
744             return undef;
745             }
746              
747             unless (defined $sense) {
748             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
749             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
750             $self->{errorString} .= "Invalid synset ($synset): sense number undefined.";
751             return undef;
752             }
753              
754             # check to make sure the word, pos, and sense are well-formed
755             if ($word !~ /^[^\#]+$/) {
756             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
757             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
758             $self->{errorString} .= "$synset has a poorly-formed word ($word).";
759             return undef;
760             }
761             elsif ($pos !~ /^[nvar]$/) {
762             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
763             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
764             $self->{errorString} .= "$synset has a bad part of speech ($pos). Part of speech must be one of n, v, a, r.";
765             return undef;
766             }
767             elsif ($sense !~ /^\d+$/) {
768             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
769             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
770             $self->{errorString} .= "$synset has a bad sense number ($pos). Sense number must be a natural number.";
771             return undef;
772             }
773              
774             # check to see if synset exists
775             my $offset = $self->{wn}->offset ($synset);
776             unless ($offset) {
777             $self->{error} = $self->{error} < 1 ? 1 : $self->{error};
778             $self->{errorString} .= "\nWarning (${class}::validateSynset()) - ";
779             $self->{errorString} .= "$synset not found in WordNet.";
780             return undef;
781             }
782              
783             return ($word, $pos, $sense, $offset);
784             }
785              
786             =item $obj->getRelatedness($synset1, $synset2)
787              
788             parameters: synset1, synset2
789              
790             returns: a relatedness score
791              
792             This is a virtual method. It must be overridden by a module that
793             is derived from this class. This method takes two synsets and
794             returns a numeric value as their score of relatedness.
795              
796             =cut
797              
798             sub getRelatedness {
799             my $self = shift;
800             my $class = ref $self || $self;
801             $self->{errorString} .= "\nError (${class}::getRelatedness()) - ";
802             $self->{errorString} .= "This is a virtual method provided by ";
803             $self->{errorString} .= __PACKAGE__ . " that must be overridden.";
804             $self->{error} = 2;
805             return undef;
806             }
807              
808             # Subroutine that takes as input an array of offsets
809             # or offsets(POS) and for each prints to traceString the
810             # WORD#POS#(/)
811             # INPUT PARAMS : $pos .. Part of speech
812             # : ($offestpos1, $offsetpos2, ...) .. Array of offsetPOS's
813             # or offests
814             # RETURN VALUES : none.
815             sub _printSet
816             {
817             use Carp;
818             carp "This method is deprecated; use printSet instead";
819             my $self = shift;
820             my $pos = shift;
821             my $wn = $self->{wn};
822             my @offsets = @_;
823             my $wps;
824             my $opstr = "";
825              
826             foreach my $offset (@offsets) {
827             $offset =~ m/(\d+)([a-z])?/;
828             $offset = $1;
829             if($offset) {
830             $wps = $wn->getSense($offset, ($2 ? $2 : $pos));
831             }
832             else {
833             $wps = "*Root*\#". ($2 ? $2 : $pos) ."\#1";
834             }
835             $wps =~ s/ +/_/g;
836             if($self->{trace} == 2 && defined $offset && $offset != 0) {
837             $wps =~ s/\#[0-9]*$/\#$offset/;
838             }
839             $opstr .= "$wps ";
840             }
841             $opstr =~ s/\s+$//;
842             $self->{traceString} .= $opstr if($self->{trace});
843             }
844              
845              
846             =item $obj->printSet ($pos, $mode, @synsets)
847              
848             If tracing is turned on, prints the contents of @synsets to the trace string.
849             The contents of @synsets can be either wps strings or offsets. If they
850             are wps strings, then $mode must be the string 'wps'; if they are offsets,
851             then the mode must be 'offset'. Please don't try to mix wps and offsets.
852              
853             Returns the string that was appended to the trace string.
854              
855             =cut
856              
857             sub printSet
858             {
859             my $self = shift;
860             my $pos = shift;
861             my $mode = shift;
862             my @synsets = @_;
863              
864             my $opstr = '';
865             my $wn = $self->{wn};
866             my $wps;
867              
868             if ($mode eq 'offset') {
869             foreach my $offset (@synsets) {
870             $offset =~ m/^(\d+)([a-z])?/;
871             $offset = $1;
872             if ($offset) {
873             $wps = $wn->getSense ($offset, (defined $2 ? $2 : $pos));
874             }
875             else {
876             $wps = "*Root*\#" . (defined $2 ? $2 : $pos) . "\#1";
877             }
878             $wps =~ tr/ /_/;
879             if ($self->{trace} == 2 && defined $offset) {
880             $wps =~ s/\#[0-9]+$/\#$offset/;
881             }
882             $opstr .= "$wps ";
883             }
884             }
885             elsif ($mode eq 'wps') {
886             WPS:
887             foreach my $wps (@synsets) {
888             unless ($self->{trace} == 2) {
889             $opstr .= "$wps ";
890             next WPS;
891             }
892             my $offset = scalar ($wps =~ /\*Root\*/i) ? 0 : $wn->offset ($wps);
893             my ($word, $p) = $wps =~ /^(\S+)\#([nvar])\#\d+$/;
894             $opstr .= "$word#$p#$offset ";
895             }
896             }
897             $opstr =~ s/\s+$//;
898             $self->{traceString} .= $opstr if $self->{trace};
899             return $opstr;
900             }
901              
902              
903             # you should only call this if $self->{doCache} is true
904             # nothing bad will happen if you call anyways, but it will slow things down
905             #
906             # NEW! You can specify whether or not relatedness is symmetric: if
907             # relatedness (c1, c2) = relatedness (c2, c1), then relatedness is symmetric.
908              
909             =item $obj->fetchFromCache($wps1, $wps2, $non_symmetric)
910              
911             Looks for the relatedness value of ($wps1, $wps2) in the cache. If
912             $non_symmetric is false (or isn't specified), then the cache is searched
913             for ($wps2, $wps1) if ($wps1, $wps2) isn't found.
914              
915             Returns: a relatedness value or undef if none found in the cache.
916              
917             =cut
918              
919             sub fetchFromCache
920             {
921             my $self = shift;
922             my ($wps1, $wps2, $non_symmetric) = @_;
923              
924             $self->{doCache} or return undef;
925              
926             $non_symmetric = 0 unless defined $non_symmetric;
927              
928             if (defined $self->{simCache}->{"${wps1}::$wps2"}) {
929             if ($self->{traceCache}->{"${wps1}::$wps2"}) {
930             $self->{traceString} .= $self->{traceCache}->{"${wps1}::$wps2"};
931             }
932             return $self->{simCache}->{"${wps1}::$wps2"};
933             }
934             elsif (!$non_symmetric and defined $self->{simCache}->{"${wps2}::$wps1"}) {
935             if ($self->{traceCache}->{"${wps2}::$wps1"}) {
936             $self->{traceString} .= $self->{traceCache}->{"${wps2}::$wps1"};
937             }
938             return $self->{simCache}->{"${wps2}::$wps1"};
939             }
940             return undef;
941             }
942              
943             =item $obj->storeToCache ($wps1, $wps2, $score)
944              
945             Stores the relatedness value, $score, of ($wps1, $wps2) to the cache.
946              
947             Returns: nothing
948              
949             =cut
950              
951             sub storeToCache
952             {
953             my $self = shift;
954             my ($wps1, $wps2, $score) = @_;
955              
956             $self->{doCache} or return;
957              
958             $self->{simCache}->{"${wps1}::$wps2"} = $score;
959             if ($self->{trace}) {
960             $self->{traceCache}->{"${wps1}::$wps2"} = $self->{traceString}
961             }
962             push (@{$self->{cacheQ}}, "${wps1}::$wps2");
963             if (($self->{maxCacheSize} >= 0)
964             and ($self->{maxCacheSize} != UNLIMITED_CACHE)) {
965             while (scalar (@{$self->{cacheQ}}) > $self->{maxCacheSize}) {
966             my $delItem = shift(@{$self->{cacheQ}});
967             delete $self->{simCache}->{$delItem};
968             delete $self->{traceCache}->{$delItem};
969             }
970             }
971             }
972              
973              
974             1;
975              
976             __END__