| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# WordNet::Similarity::PathFinder version 2.04 |
|
2
|
|
|
|
|
|
|
# (Last updated $Id: PathFinder.pm,v 1.39 2008/03/27 06:21:17 sidz1979 Exp $) |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Module containing path-finding code for the various measures of semantic |
|
5
|
|
|
|
|
|
|
# relatedness. |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# Copyright (c) 2005, |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# Ted Pedersen, University of Minnesota Duluth |
|
10
|
|
|
|
|
|
|
# tpederse at d.umn.edu |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# Jason Michelizzi, Univeristy of Minnesota Duluth |
|
13
|
|
|
|
|
|
|
# mich0212 at d.umn.edu |
|
14
|
|
|
|
|
|
|
# |
|
15
|
|
|
|
|
|
|
# Siddharth Patwardhan, University of Utah, Salt Lake City |
|
16
|
|
|
|
|
|
|
# sidd at cs.utah.edu |
|
17
|
|
|
|
|
|
|
# |
|
18
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
|
19
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
|
20
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
|
21
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
|
22
|
|
|
|
|
|
|
# |
|
23
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
|
24
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
25
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
26
|
|
|
|
|
|
|
# GNU General Public License for more details. |
|
27
|
|
|
|
|
|
|
# |
|
28
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
|
29
|
|
|
|
|
|
|
# along with this program; if not, write to |
|
30
|
|
|
|
|
|
|
# |
|
31
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
|
32
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
|
33
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
|
34
|
|
|
|
|
|
|
# |
|
35
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
package WordNet::Similarity::PathFinder; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 NAME |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
WordNet::Similarity::PathFinder - module to implement path finding methods |
|
42
|
|
|
|
|
|
|
(by node counting) for WordNet::Similarity measures of semantic relatedness |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
use WordNet::QueryData; |
|
47
|
|
|
|
|
|
|
my $wn = WordNet::QueryData->new; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
use WordNet::Similarity::PathFinder; |
|
50
|
|
|
|
|
|
|
my $obj = WordNet::Similarity::PathFinder->new ($wn); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my $wps1 = 'winston_churchill#n#1'; |
|
53
|
|
|
|
|
|
|
my $wps2 = 'england#n#1'; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# parseWps returns reference to an array that contains |
|
56
|
|
|
|
|
|
|
# word1 pos1 sense1 offset1 word2 pos2 sense2 offset2 |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $result = $obj->parseWps($wps1, $wps2); |
|
59
|
|
|
|
|
|
|
print "@$result\n"; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# path is a reference to an array that contains the path between |
|
62
|
|
|
|
|
|
|
# wps1 and wps2 expressed as a series of wps values |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my @paths = $obj->getShortestPath($wps1, $wps2, 'n', 'wps'); |
|
65
|
|
|
|
|
|
|
my ($length, $path) = @{shift @paths}; |
|
66
|
|
|
|
|
|
|
defined $path or die "No path between synsets"; |
|
67
|
|
|
|
|
|
|
print "shortest path between $wps1 and $wps2 is $length edges long\n"; |
|
68
|
|
|
|
|
|
|
print "@$path\n"; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $offset1 = $wn -> offset($wps1); |
|
71
|
|
|
|
|
|
|
my $offset2 = $wn -> offset($wps2); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# path is a reference to an array that contains the path between |
|
74
|
|
|
|
|
|
|
# offset1 and offset2 expressed as a series of offset values |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my @paths = $obj->getShortestPath($offset1, $offset2, 'n', 'offset'); |
|
77
|
|
|
|
|
|
|
my ($length, $path) = @{shift @paths}; |
|
78
|
|
|
|
|
|
|
defined $path or die "No path between synsets"; |
|
79
|
|
|
|
|
|
|
print "shortest path between $offset1 and $offset2 is $length edges long\n"; |
|
80
|
|
|
|
|
|
|
print "@$path\n"; |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 Introduction |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
This class is derived from (i.e., is a sub-class of) WordNet::Similarity. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
The methods in this module are useful for finding paths between concepts |
|
89
|
|
|
|
|
|
|
in WordNet's 'is-a' taxonomies. Concept A is-a concept B if, and only if, |
|
90
|
|
|
|
|
|
|
B is a hypernym of A or A is in the hypernym tree of B. N.B., only nouns |
|
91
|
|
|
|
|
|
|
and verbs have hypernyms. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
The methods that find path lengths (such as C and |
|
94
|
|
|
|
|
|
|
C compute the lengths using node-counting not edge-counting. |
|
95
|
|
|
|
|
|
|
In general, the length of a path using node-counting will always be one |
|
96
|
|
|
|
|
|
|
more than the length using edge-counting. For example, if concept A |
|
97
|
|
|
|
|
|
|
is a hyponym of concept B, then the path length between A and B using |
|
98
|
|
|
|
|
|
|
node-counting is 2, but the length using edge-counting is 1. Likewise, the |
|
99
|
|
|
|
|
|
|
path between A and A is 1 using node-counting and 0 using edge-counting. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 Methods |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This module inherits all the methods of WordNet::Similarity. Additionally, |
|
104
|
|
|
|
|
|
|
the following methods are also defined. |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head3 Public methods |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=over |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
|
111
|
|
|
|
|
|
|
|
|
112
|
7
|
|
|
7
|
|
46679
|
use strict; |
|
|
7
|
|
|
|
|
16
|
|
|
|
7
|
|
|
|
|
150
|
|
|
113
|
7
|
|
|
7
|
|
31
|
use warnings; |
|
|
7
|
|
|
|
|
14
|
|
|
|
7
|
|
|
|
|
160
|
|
|
114
|
7
|
|
|
7
|
|
3513
|
use WordNet::Similarity; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
use File::Spec; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
our @ISA = qw/WordNet::Similarity/; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
our $VERSION = '2.04'; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
WordNet::Similarity::addConfigOption ('rootNode', 0, 'i', 1); |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item $measure->setPosList(Z<>) |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Specifies the parts of speech that measures derived from this module |
|
126
|
|
|
|
|
|
|
support (namely, nouns and verbs). |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
parameters: none |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
returns: true |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub setPosList |
|
135
|
|
|
|
|
|
|
{ |
|
136
|
|
|
|
|
|
|
my $self = shift; |
|
137
|
|
|
|
|
|
|
$self->{n} = 1; |
|
138
|
|
|
|
|
|
|
$self->{v} = 1; |
|
139
|
|
|
|
|
|
|
return 1; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item $self->traceOptions(Z<>) |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Overrides method of same name in WordNet::Similarity. Prints module-specific |
|
146
|
|
|
|
|
|
|
configuration options to the trace string (if tracing is on). PathFinder |
|
147
|
|
|
|
|
|
|
supports one module specific option: rootNode. |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Parameters: none |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
returns: nothing |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub traceOptions |
|
156
|
|
|
|
|
|
|
{ |
|
157
|
|
|
|
|
|
|
my $self = shift; |
|
158
|
|
|
|
|
|
|
$self->{traceString} .= "root node :: $self->{rootNode}\n"; |
|
159
|
|
|
|
|
|
|
$self->SUPER::traceOptions(); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=item $measure->parseWps($synset1, $synset2) |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
parameters: synset1, synset2 -- two synsets in wps format |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
returns: a reference to an array, WordNet::Similarity::UNRELATED, or undef |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Overrides the parseWps() method in WordNet::Similarity in order to run |
|
170
|
|
|
|
|
|
|
additional checks, but calls WordNet::Similarity::parseWps() to get |
|
171
|
|
|
|
|
|
|
those checks accomplished as well. Thus, this method does everything |
|
172
|
|
|
|
|
|
|
that WordNet::Similarity::parseWps does. |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=over |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=item quote from WordNet::Similarity::parseWps: |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
This method checks the format of the two input synsets by calling |
|
179
|
|
|
|
|
|
|
validateSynset() for each synset. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
If the synsets are in wps format, a reference to an array will be returned. |
|
182
|
|
|
|
|
|
|
This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, |
|
183
|
|
|
|
|
|
|
$sense2, $offset2] where $word1 is the word part of $wps1, $pos1, is the |
|
184
|
|
|
|
|
|
|
part of speech of $wps1, $sense1 is the sense from $wps. $offset1 is the |
|
185
|
|
|
|
|
|
|
offset for $wps1. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
If an error occurs (such as a synset being poorly-formed), then undef |
|
188
|
|
|
|
|
|
|
is returned, the error level is set to non-zero, and an error message is |
|
189
|
|
|
|
|
|
|
appended to the error string. |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=back |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
In addition, if the two synsets are from different parts of speech, then |
|
194
|
|
|
|
|
|
|
WordNet::Similarity::UNRELATED is returned, the error level is set to 1, and |
|
195
|
|
|
|
|
|
|
a message is appended to the error string. |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
If either synset is not a noun or a verb, then the error level |
|
198
|
|
|
|
|
|
|
is set to 1, a message is appended to the error string, and undef |
|
199
|
|
|
|
|
|
|
is returned. |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
If the synsets are in wps format, a reference to an array will be returned. |
|
202
|
|
|
|
|
|
|
This array has the form [$word1, $pos1, $sense1, $offset1, $word2, $pos2, |
|
203
|
|
|
|
|
|
|
$sense2, $offset2]. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub parseWps |
|
208
|
|
|
|
|
|
|
{ |
|
209
|
|
|
|
|
|
|
my $self = shift; |
|
210
|
|
|
|
|
|
|
my $ret = $self->SUPER::parseWps (@_); |
|
211
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
ref $ret or return $ret; |
|
214
|
|
|
|
|
|
|
my ($w1, $pos1, $s1, $off1, $w2, $pos2, $s2, $off2) = @{$ret}; |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# check to make sure both input words are of the same part of speech |
|
217
|
|
|
|
|
|
|
if ($pos1 ne $pos2) { |
|
218
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
219
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::parseWps()) - "; |
|
220
|
|
|
|
|
|
|
$self->{errorString} .= |
|
221
|
|
|
|
|
|
|
"$w1#$pos1 and $w2#$pos2 belong to different parts of speech."; |
|
222
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
223
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
|
224
|
|
|
|
|
|
|
$self->printSet ($pos1, 'wps', "$w1#$pos1#$s1"); |
|
225
|
|
|
|
|
|
|
$self->{traceString} .= " and "; |
|
226
|
|
|
|
|
|
|
$self->printSet ($pos2, 'wps', "$w2#$pos2#$s2"); |
|
227
|
|
|
|
|
|
|
$self->{traceString} .= " belong to different parts of speech."; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
return $self->UNRELATED; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# check to make sure that the pos is a noun or verb |
|
233
|
|
|
|
|
|
|
if (index ("nv", $pos1) < $[) { |
|
234
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
235
|
|
|
|
|
|
|
$self->{traceString} .= |
|
236
|
|
|
|
|
|
|
"Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2).\n"; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
239
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::parseWps()) - "; |
|
240
|
|
|
|
|
|
|
$self->{errorString} .= |
|
241
|
|
|
|
|
|
|
"Only verbs and nouns have hypernym trees ($w1#$pos1, $w2#$pos2)."; |
|
242
|
|
|
|
|
|
|
return undef; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
return $ret; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item $measure->getShortestPath($synset1, $synset2, $pos, $mode) |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Given two input synsets, returns the shortest path between the two synsets. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Parameters: two synsets, a part-of-speech, and a mode indicator |
|
254
|
|
|
|
|
|
|
(i.e., the string 'offset' or 'wps'). If the mode is 'offset', then the |
|
255
|
|
|
|
|
|
|
synsets should be WordNet offsets. If the mode is 'wps', then the synsets |
|
256
|
|
|
|
|
|
|
should be in word#pos#sense format. |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Returns: a list of references to arrays. Each array has the form |
|
259
|
|
|
|
|
|
|
C<($path_length, $path_ref)> where $path_ref is |
|
260
|
|
|
|
|
|
|
a reference to an array whose elements are the synsets along the shortest |
|
261
|
|
|
|
|
|
|
path between the two input synsets. There will be as many array references |
|
262
|
|
|
|
|
|
|
returned as there are shortest paths between the synsets. That is, there |
|
263
|
|
|
|
|
|
|
will be no arrays returned if there is no path between the synsets, and there |
|
264
|
|
|
|
|
|
|
will be at least one array returned if there is a path between the synsets. |
|
265
|
|
|
|
|
|
|
If there are multiple paths tied for being shortest in length, then all |
|
266
|
|
|
|
|
|
|
those paths are returned (hence, this is why multiple array references |
|
267
|
|
|
|
|
|
|
can be returned). |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Upon error, returns undef, sets the error level to non-zero, and appends |
|
270
|
|
|
|
|
|
|
a message to the error string. |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub getShortestPath |
|
275
|
|
|
|
|
|
|
{ |
|
276
|
|
|
|
|
|
|
my $self = shift; |
|
277
|
|
|
|
|
|
|
my $synset1 = shift; |
|
278
|
|
|
|
|
|
|
my $synset2 = shift; |
|
279
|
|
|
|
|
|
|
my $pos = shift; |
|
280
|
|
|
|
|
|
|
my $mode = shift; |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
283
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# JM 2/9/04 - we do this in validateSynset() now |
|
286
|
|
|
|
|
|
|
#if ($mode eq 'wps') { |
|
287
|
|
|
|
|
|
|
# # this prevents problems when the two input words are different word |
|
288
|
|
|
|
|
|
|
# # senses from the same synset (e.g., car#n#1 and auto#n#1) |
|
289
|
|
|
|
|
|
|
# ($synset1) = $wn->querySense ($synset1, "syns"); |
|
290
|
|
|
|
|
|
|
# ($synset2) = $wn->querySense ($synset2, "syns"); |
|
291
|
|
|
|
|
|
|
#} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# check to see if any paths were found; if none were found, then |
|
296
|
|
|
|
|
|
|
# $paths[0] will be undefined |
|
297
|
|
|
|
|
|
|
unless (defined $paths[0]) { |
|
298
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
299
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getShortestPath()) - "; |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
my ($wps1, $wps2) = ($synset1, $synset2); |
|
302
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
|
303
|
|
|
|
|
|
|
$wps1 = $wn->getSense ($synset1, $pos); |
|
304
|
|
|
|
|
|
|
$wps2 = $wn->getSense ($synset2, $pos); |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
$self->{errorString} .= "No path between synsets $wps1 and $wps2 found."; |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
309
|
|
|
|
|
|
|
$self->{traceString} .= "\nNo path between synsets "; |
|
310
|
|
|
|
|
|
|
$self->printSet ($pos, 'wps', $wps1); |
|
311
|
|
|
|
|
|
|
$self->{traceString} .= " and "; |
|
312
|
|
|
|
|
|
|
$self->printSet ($pos, 'wps', $wps2); |
|
313
|
|
|
|
|
|
|
$self->{traceString} .= " found."; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
return undef; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
my $best_length = $paths[0]->[1]; |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my @return = ([$paths[0]->[1], $paths[0]->[2]]); |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
foreach (1..$#paths) { |
|
323
|
|
|
|
|
|
|
last if $paths[$_]->[1] > $best_length; |
|
324
|
|
|
|
|
|
|
push @return, [$paths[$_]->[1], $paths[$_]->[2]]; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
#my $length = $paths[0]->[1]; |
|
328
|
|
|
|
|
|
|
#my $path = $paths[0]->[2]; |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
331
|
|
|
|
|
|
|
for (@return) { |
|
332
|
|
|
|
|
|
|
$self->{traceString} .= "\nShortest path: "; |
|
333
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @{$_->[1]}); |
|
334
|
|
|
|
|
|
|
$self->{traceString} .= "\nPath length = " . $_->[0]; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
return @return; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item $measure->getAllPaths($synset1, $synset2, $pos, $mode) |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Given two input synsets, returns all the paths between the two synsets. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Parameters: a reference to the object, two synsets, a part-of-speech, and |
|
346
|
|
|
|
|
|
|
a mode indicator (the string 'offset' or 'wps'). |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
If the mode is 'offset', then the synsets should be WordNet offsets. If the |
|
349
|
|
|
|
|
|
|
mode is 'wps', then they should be strings in word#pos#sense format. |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Returns: A list of all paths, sorted by path length in ascending order. The |
|
352
|
|
|
|
|
|
|
format for each item in the list is a reference to an array that has the |
|
353
|
|
|
|
|
|
|
format: [$top, $length, [@synsets_list]] where @synset_list is a list |
|
354
|
|
|
|
|
|
|
of synsets along the path (including the two input synsets) |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
Returns undef on error. |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub getAllPaths |
|
361
|
|
|
|
|
|
|
{ |
|
362
|
|
|
|
|
|
|
my $self = shift; |
|
363
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
364
|
|
|
|
|
|
|
my $synset1 = shift; |
|
365
|
|
|
|
|
|
|
my $synset2 = shift; |
|
366
|
|
|
|
|
|
|
my $pos = shift; |
|
367
|
|
|
|
|
|
|
my $mode = shift; |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
if (($mode ne 'offset') && ($mode ne 'wps')) { |
|
370
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
371
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; |
|
372
|
|
|
|
|
|
|
$self->{errorString} .= "Mode must be either 'offset' or 'wps'"; |
|
373
|
|
|
|
|
|
|
return undef; |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
my @lTrees = $self->_getHypernymTrees ($synset1, $pos, $mode); |
|
377
|
|
|
|
|
|
|
my @rTrees = $self->_getHypernymTrees ($synset2, $pos, $mode); |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# [trace] |
|
380
|
|
|
|
|
|
|
if($self->{trace}) { |
|
381
|
|
|
|
|
|
|
foreach my $lTree (@lTrees) { |
|
382
|
|
|
|
|
|
|
$self->{traceString} .= "HyperTree: "; |
|
383
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @$lTree); |
|
384
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
foreach my $rTree (@rTrees) { |
|
387
|
|
|
|
|
|
|
$self->{traceString} .= "HyperTree: "; |
|
388
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, @$rTree); |
|
389
|
|
|
|
|
|
|
$self->{traceString} .= "\n"; |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
# [/trace] |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# Find the length of each path in these trees. |
|
395
|
|
|
|
|
|
|
my @return; |
|
396
|
|
|
|
|
|
|
# my $root = $mode eq 'offset' |
|
397
|
|
|
|
|
|
|
# ? 0 |
|
398
|
|
|
|
|
|
|
# : ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
LTREE: |
|
401
|
|
|
|
|
|
|
foreach my $lTree (@lTrees) { |
|
402
|
|
|
|
|
|
|
RTREE: |
|
403
|
|
|
|
|
|
|
foreach my $rTree (@rTrees) { |
|
404
|
|
|
|
|
|
|
my $subsumer; |
|
405
|
|
|
|
|
|
|
$subsumer = $self->_getSubsumerFromTrees ($lTree, $rTree, $mode); |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
next RTREE unless defined $subsumer; |
|
408
|
|
|
|
|
|
|
#next RTREE if ($subsumer eq $root) and !$self->{rootNode}; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
my $lCount = 0; |
|
411
|
|
|
|
|
|
|
my @lpath; |
|
412
|
|
|
|
|
|
|
foreach my $offset (reverse @{$lTree}) { |
|
413
|
|
|
|
|
|
|
$lCount++; |
|
414
|
|
|
|
|
|
|
last if($offset eq $subsumer); |
|
415
|
|
|
|
|
|
|
push @lpath, $offset; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
my $rCount = 0; |
|
418
|
|
|
|
|
|
|
my @rpath; |
|
419
|
|
|
|
|
|
|
foreach my $offset (reverse @{$rTree}) { |
|
420
|
|
|
|
|
|
|
$rCount++; |
|
421
|
|
|
|
|
|
|
last if($offset eq $subsumer); |
|
422
|
|
|
|
|
|
|
unshift @rpath, $offset; |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
my $path = [@lpath, $subsumer, @rpath]; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
push @return, [$subsumer, $rCount + $lCount - 1, $path]; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
return sort {$a->[1] <=> $b->[1]} @return; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item $measure->validateSynset($synset) |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
parameters: synset -- a string in word#pos#sense format |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
returns: a list or undef on error |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
This method overrides the method of the same name in WordNet::Similarity |
|
442
|
|
|
|
|
|
|
to provide additional behavior but calls WordNet::Similarity::validateSynset |
|
443
|
|
|
|
|
|
|
to accomplish that method's behavior. Thus, this method does everything |
|
444
|
|
|
|
|
|
|
that WordNet::Similarity::validateSynset does. |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=over |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=item quote from WordNet::Similarity::validateSynset: |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
This method does the following: |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=over |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=item 1. |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Verifies that the synset is well-formed (i.e., that it consists of three |
|
457
|
|
|
|
|
|
|
parts separated by #s, the pos is one of {n, v, a, r} and that sense |
|
458
|
|
|
|
|
|
|
is a natural number). A synset that matches the pattern '[^\#]+\#[nvar]\#\d+' |
|
459
|
|
|
|
|
|
|
is considered well-formed. |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item 2. |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Checks if the synset exists by trying to find the offset for the synset |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=back |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=back |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This method, however, has a slightly different return value. Instead of |
|
470
|
|
|
|
|
|
|
merely breaking the synset into three parts, it returns the "safe" form |
|
471
|
|
|
|
|
|
|
of the synset. That is, if a synset has multiple word senses, this |
|
472
|
|
|
|
|
|
|
method returns the first word sense in that synset (this is so that |
|
473
|
|
|
|
|
|
|
other path-finding methods work properly). For example, if the input |
|
474
|
|
|
|
|
|
|
to this method is auto#n#1, the return value is ('car', 'n', 1, 2853224) |
|
475
|
|
|
|
|
|
|
since the sense 'car#n#1' is the first member of the synset to which |
|
476
|
|
|
|
|
|
|
'auto#n#1' belongs. |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
If any of these tests fails, then the error level is set to non-zero, a |
|
479
|
|
|
|
|
|
|
message is appended to the error string, and undef is returned. |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=cut |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
sub validateSynset |
|
484
|
|
|
|
|
|
|
{ |
|
485
|
|
|
|
|
|
|
my $self = shift; |
|
486
|
|
|
|
|
|
|
my $synset = shift; |
|
487
|
|
|
|
|
|
|
my ($word, $pos, $sense, $offset) = $self->SUPER::validateSynset ($synset); |
|
488
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# check to see if previous call encountered an error: |
|
491
|
|
|
|
|
|
|
return undef if $self->{error}; |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
my @synset = $self->{wn}->querySense ($synset, "syns"); |
|
494
|
|
|
|
|
|
|
my $safewps = shift @synset; |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
unless (defined $safewps) { |
|
497
|
|
|
|
|
|
|
# safety check--we shouldn't ever get here. querySense shouldn't |
|
498
|
|
|
|
|
|
|
# return undef unless the input synset is bad, but we've already |
|
499
|
|
|
|
|
|
|
# checked that synset |
|
500
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
501
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
|
502
|
|
|
|
|
|
|
$self->{errorString} .= "No synset appears to exist for $synset."; |
|
503
|
|
|
|
|
|
|
return undef; |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
unless ($safewps =~ /^([^\s\#]+)\#([nvar])\#(\d+)$/) { |
|
507
|
|
|
|
|
|
|
# we should never get here -- if QueryData doesn't return word senses |
|
508
|
|
|
|
|
|
|
# in the right format, then we're in a lot of trouble... nevertheless, |
|
509
|
|
|
|
|
|
|
# we check just to be sure |
|
510
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
511
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::validateSynset()) - "; |
|
512
|
|
|
|
|
|
|
$self->{errorString} .= "Internal error: $safewps is not well-formed. Has WordNet or WordNet::QueryData changed format?"; |
|
513
|
|
|
|
|
|
|
return undef; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
return ($1, $2, $3, $offset); |
|
517
|
|
|
|
|
|
|
} |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=back |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head3 Private methods |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=over |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item $measure->_getHypernymTrees($synset, $pos, $mode) |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
This method takes as input a synset and returns a list of references |
|
529
|
|
|
|
|
|
|
to arrays where these arrays are paths from the input synset to the |
|
530
|
|
|
|
|
|
|
top of the taxonomy (*Root*#[nv]#1 if the root node is on). |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Parameters: a synset, a part-of-speech, and a mode. |
|
533
|
|
|
|
|
|
|
The mode must be either the string 'wps' or 'offset'. If |
|
534
|
|
|
|
|
|
|
the mode is 'wps', then the synset must be in wps format; otherwise, it |
|
535
|
|
|
|
|
|
|
must be an offset. |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Returns: a list of references to arrays. These arrays are paths (hypernym |
|
538
|
|
|
|
|
|
|
trees). |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=cut |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# Suroutine that returns an array of hypernym trees, given the offset of |
|
543
|
|
|
|
|
|
|
# the synset. Each hypernym tree is an array of offsets. |
|
544
|
|
|
|
|
|
|
# INPUT PARAMS : $offset .. Offset of the synset. |
|
545
|
|
|
|
|
|
|
# : $pos .. Part of speech. |
|
546
|
|
|
|
|
|
|
# RETURN VALUES : (@tree1, @tree2, ...) .. an array of Hypernym trees (offsets) |
|
547
|
|
|
|
|
|
|
sub _getHypernymTrees |
|
548
|
|
|
|
|
|
|
{ |
|
549
|
|
|
|
|
|
|
my $self = shift; |
|
550
|
|
|
|
|
|
|
my $wn = $self->{wn}; |
|
551
|
|
|
|
|
|
|
my $synset = shift; |
|
552
|
|
|
|
|
|
|
my $pos = shift; |
|
553
|
|
|
|
|
|
|
my $mode = shift; |
|
554
|
|
|
|
|
|
|
my $curPath = shift; |
|
555
|
|
|
|
|
|
|
$curPath = {} if(!defined($curPath)); |
|
556
|
|
|
|
|
|
|
$curPath->{$synset} = 1; |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
my $wordForm = $synset; |
|
560
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
|
561
|
|
|
|
|
|
|
# check if the input synset is one of the imaginary root nodes |
|
562
|
|
|
|
|
|
|
if ($synset == 0) { |
|
563
|
|
|
|
|
|
|
return ([0]); |
|
564
|
|
|
|
|
|
|
} |
|
565
|
|
|
|
|
|
|
$wordForm = $wn->getSense($synset, $pos); |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
else { |
|
568
|
|
|
|
|
|
|
# check for root node |
|
569
|
|
|
|
|
|
|
if ($synset =~ /\*ROOT\*/i) { |
|
570
|
|
|
|
|
|
|
return ([$synset]); |
|
571
|
|
|
|
|
|
|
} |
|
572
|
|
|
|
|
|
|
} |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
my @hypernyms = $wn->querySense($wordForm, "hypes"); |
|
575
|
|
|
|
|
|
|
my @returnArray = (); |
|
576
|
|
|
|
|
|
|
if($#hypernyms < 0) { |
|
577
|
|
|
|
|
|
|
my @tmpArray = $synset; |
|
578
|
|
|
|
|
|
|
if ($self->{rootNode}) { |
|
579
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
|
580
|
|
|
|
|
|
|
unshift @tmpArray, 0; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
else { |
|
583
|
|
|
|
|
|
|
unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
|
584
|
|
|
|
|
|
|
} |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
push @returnArray, [@tmpArray]; |
|
587
|
|
|
|
|
|
|
} |
|
588
|
|
|
|
|
|
|
else { |
|
589
|
|
|
|
|
|
|
foreach my $hypernym (@hypernyms) { |
|
590
|
|
|
|
|
|
|
my $hypesynset = $mode eq 'offset' ? $wn->offset ($hypernym) : $hypernym; |
|
591
|
|
|
|
|
|
|
if(!defined($curPath->{$hypesynset})) |
|
592
|
|
|
|
|
|
|
{ |
|
593
|
|
|
|
|
|
|
my %localCopy = %{$curPath}; |
|
594
|
|
|
|
|
|
|
my @tmpArray = $self->_getHypernymTrees ($hypesynset, $pos, $mode, \%localCopy); |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
foreach my $element (@tmpArray) { |
|
597
|
|
|
|
|
|
|
push @$element, $synset; |
|
598
|
|
|
|
|
|
|
push @returnArray, [@$element]; |
|
599
|
|
|
|
|
|
|
} |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
if(scalar(@returnArray) <= 0) { |
|
602
|
|
|
|
|
|
|
my @tmpArray = $synset; |
|
603
|
|
|
|
|
|
|
if ($self->{rootNode}) { |
|
604
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
|
605
|
|
|
|
|
|
|
unshift @tmpArray, 0; |
|
606
|
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
else { |
|
608
|
|
|
|
|
|
|
unshift @tmpArray, ($pos eq 'n') ? $self->ROOT_N : $self->ROOT_V; |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
} |
|
611
|
|
|
|
|
|
|
push @returnArray, [@tmpArray]; |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
return @returnArray; |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=item getLCSbyPath($synset1, $synset2, $pos, $mode) |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Given two input synsets, finds the least common subsumer (LCS) of them. |
|
621
|
|
|
|
|
|
|
If there are multiple candidates for the LCS (due to multiple inheritance), |
|
622
|
|
|
|
|
|
|
the LCS that results in the shortest path between in input concepts is |
|
623
|
|
|
|
|
|
|
chosen. |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Parameters: two synsets, a part of speech, and a mode. |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
Returns: a list of references to arrays where each array has the from |
|
628
|
|
|
|
|
|
|
C<($lcs, $pathlength)>. $pathlength is the length |
|
629
|
|
|
|
|
|
|
of the path between the two input concepts. There can be multiple LCSs |
|
630
|
|
|
|
|
|
|
returned if there are ties for the shortest path between the two synsets. |
|
631
|
|
|
|
|
|
|
Returns undef on error. |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=cut |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
sub getLCSbyPath |
|
636
|
|
|
|
|
|
|
{ |
|
637
|
|
|
|
|
|
|
my $self = shift; |
|
638
|
|
|
|
|
|
|
my $synset1 = shift; |
|
639
|
|
|
|
|
|
|
my $synset2 = shift; |
|
640
|
|
|
|
|
|
|
my $pos = shift; |
|
641
|
|
|
|
|
|
|
my $mode = shift; |
|
642
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
my @paths = $self->getAllPaths ($synset1, $synset2, $pos, $mode); |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
# if no paths were found, $paths[0] should be undefined |
|
647
|
|
|
|
|
|
|
unless (defined $paths[0]) { |
|
648
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
649
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getLCSbyPath()) - "; |
|
650
|
|
|
|
|
|
|
$self->{errorString} .= "No LCS found."; |
|
651
|
|
|
|
|
|
|
return undef; |
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
655
|
|
|
|
|
|
|
$self->{traceString} .= "Lowest Common Subsumer(s): "; |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
my @return; |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# put the best LCS(s) into @return; do some tracing at the same time. |
|
661
|
|
|
|
|
|
|
foreach my $pathref (@paths) { |
|
662
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
663
|
|
|
|
|
|
|
# print path to trace string |
|
664
|
|
|
|
|
|
|
$self->printSet ($pos, $mode, $pathref->[0]); |
|
665
|
|
|
|
|
|
|
$self->{traceString} .= " (Length=".$pathref->[1].")\n"; |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# push onto return array if this path length is tied for best |
|
669
|
|
|
|
|
|
|
if ($pathref->[1] <= $paths[0]->[1]) { |
|
670
|
|
|
|
|
|
|
push @return, [$pathref->[0], $pathref->[1]]; |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
if ($self->{trace}) { |
|
675
|
|
|
|
|
|
|
$self->{traceString} .= "\n\n"; |
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
return @return; |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $measure->_getSubsumerFromTrees($treeref1, $treeref2, $mode) |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
This subroutine returns takes two trees as produced by getHypernymTrees |
|
685
|
|
|
|
|
|
|
and returns the most specific subsumer from them. |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Parameters: two references to arrays, and |
|
688
|
|
|
|
|
|
|
a string indicating mode ('wps' or 'offset'). |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
Returns: the subsumer or undef |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=cut |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub _getSubsumerFromTrees |
|
695
|
|
|
|
|
|
|
{ |
|
696
|
|
|
|
|
|
|
my $self = shift; |
|
697
|
|
|
|
|
|
|
my $array1 = shift; |
|
698
|
|
|
|
|
|
|
my $array2 = shift; |
|
699
|
|
|
|
|
|
|
my $mode = shift; |
|
700
|
|
|
|
|
|
|
my @tree1 = reverse @{$array1}; |
|
701
|
|
|
|
|
|
|
my @tree2 = reverse @{$array2}; |
|
702
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
my $tmpString = " " . join (" ", @tree1) . " "; |
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
foreach my $element (@tree2) { |
|
707
|
|
|
|
|
|
|
my $pattern = ($mode eq 'offset') ? qr/ 0*$element / : qr/ \Q$element\E /; |
|
708
|
|
|
|
|
|
|
if ($tmpString =~ /$pattern/) { |
|
709
|
|
|
|
|
|
|
return $element; |
|
710
|
|
|
|
|
|
|
} |
|
711
|
|
|
|
|
|
|
} |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# no common subsumer found, check to see if we are using a root node |
|
714
|
|
|
|
|
|
|
return undef unless $self->{rootNode}; |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
717
|
|
|
|
|
|
|
$self->{error} .= "\nWarning (${class}::getSubsumerFromTrees()) - "; |
|
718
|
|
|
|
|
|
|
$self->{errorString} .= "root node 'on' but no subsumer found."; |
|
719
|
|
|
|
|
|
|
return undef; |
|
720
|
|
|
|
|
|
|
} |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=item getDepth() |
|
723
|
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
This method is non-functional and likely to be moved to a different module |
|
725
|
|
|
|
|
|
|
soon. |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=cut |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
sub getDepth |
|
730
|
|
|
|
|
|
|
{ |
|
731
|
|
|
|
|
|
|
use Carp; |
|
732
|
|
|
|
|
|
|
croak "This method is non-functional"; |
|
733
|
|
|
|
|
|
|
my $self = shift; |
|
734
|
|
|
|
|
|
|
my $synset = shift; |
|
735
|
|
|
|
|
|
|
my $pos = shift; |
|
736
|
|
|
|
|
|
|
my $mode = shift; |
|
737
|
|
|
|
|
|
|
my $class = ref $self || $self; |
|
738
|
|
|
|
|
|
|
my $offset; |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
if ($mode eq 'offset') { |
|
741
|
|
|
|
|
|
|
$offset = $synset; |
|
742
|
|
|
|
|
|
|
return 1 if $offset == 0; |
|
743
|
|
|
|
|
|
|
} |
|
744
|
|
|
|
|
|
|
elsif ($mode eq 'wps') { |
|
745
|
|
|
|
|
|
|
$offset = $self->{wn}->offset ($synset); |
|
746
|
|
|
|
|
|
|
return 1 if $synset =~ /^\*Root\*/i; |
|
747
|
|
|
|
|
|
|
} |
|
748
|
|
|
|
|
|
|
else { |
|
749
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
750
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getAllPaths()) - "; |
|
751
|
|
|
|
|
|
|
$self->{errorString} .= "Mode must be either 'offset' or 'wps'"; |
|
752
|
|
|
|
|
|
|
return undef; |
|
753
|
|
|
|
|
|
|
} |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
my $depth = $self->{depths}->{$pos}->{$offset}; |
|
756
|
|
|
|
|
|
|
defined $depth and return $depth; |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
$self->{error} = $self->{error} < 1 ? 1 : $self->{error}; |
|
759
|
|
|
|
|
|
|
$self->{errorString} .= "\nWarning (${class}::getDepth) - "; |
|
760
|
|
|
|
|
|
|
$self->{errorString} .= "$synset appears to have undefined depth."; |
|
761
|
|
|
|
|
|
|
return undef; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
1; |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
__END__ |