line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*- perl -*- |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# WordNet::SenseKey.pm version 1.03 |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Given an WordNet file offset, return the corresponding sense key |
6
|
|
|
|
|
|
|
# Meant to be used with WordNet::Similarity, which does not normally |
7
|
|
|
|
|
|
|
# manipulate data using sense keys. |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Copyright (c) 2008 Linas Vepstas linasvepstas at gmail.com |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
12
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
13
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
14
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
17
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
18
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19
|
|
|
|
|
|
|
# GNU General Public License for more details. |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
22
|
|
|
|
|
|
|
# along with this program; if not, write to |
23
|
|
|
|
|
|
|
# |
24
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
25
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
26
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
package WordNet::SenseKey; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NAME |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
WordNet::SenseKey - convert WordNet sense keys to sense numbers, and v.v. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use WordNet::QueryData; |
39
|
|
|
|
|
|
|
use WordNet::SenseKey; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $wn = WordNet::QueryData->new("/usr/share/wordnet"); |
42
|
|
|
|
|
|
|
my $sk = WordNet::SenseKey->new($wn); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $skey = $sk->get_sense_key("run#v#2"); |
45
|
|
|
|
|
|
|
print "Found the sense key $skey for run#v#2\n"; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $sense = $sk->get_sense_num($skey); |
48
|
|
|
|
|
|
|
print "Found sense $sense for key $skey\n"; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my @synset = $sk->get_synset($skey); |
51
|
|
|
|
|
|
|
print "Synset is @synset\n"; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my $can = $sk->get_canonical_sense("escape", "run%2:38:04::"); |
54
|
|
|
|
|
|
|
print "Found sense $can\n"; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 DESCRIPTION |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The WordNet::Similarity package is designed to work with words in the |
59
|
|
|
|
|
|
|
form of lemma#pos#num where "lemma" is the word lemma, "pos" is the |
60
|
|
|
|
|
|
|
part of speech, and "num" is the sense number. Unfortuantely, the |
61
|
|
|
|
|
|
|
sense numbering is not stable from one WordNet release to another. |
62
|
|
|
|
|
|
|
Thus, for external programs, it can often be more useful to work with |
63
|
|
|
|
|
|
|
sense keys. Unfortunately, the Wordnet::Similarity package is unaware |
64
|
|
|
|
|
|
|
of sense keys. This class fills that gap. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
WordNet senses keys are described in greater detail in |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
http://wordnet.princeton.edu/man/senseidx.5WN.html |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
There are four routines implemented here: |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
get_sense_key($sense); |
73
|
|
|
|
|
|
|
get_sense_num($sense_key); |
74
|
|
|
|
|
|
|
get_synset($sense_key); |
75
|
|
|
|
|
|
|
get_canonical_sense($lemma, $sense_key); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 get_sense_key |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Given a word sense, in the form of lemma#pos#num, this method returns |
80
|
|
|
|
|
|
|
the corresponding sense key, as defined by WordNet. Here, "lemma" is the |
81
|
|
|
|
|
|
|
word lemma, "pos" is the part of speech, and "num" is the sense number. |
82
|
|
|
|
|
|
|
The format of WordNet sense keys is documented in senseidx(5WN), one of |
83
|
|
|
|
|
|
|
the WordNet man pages. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Returns an undefined value if the sense key cannot be found. |
86
|
|
|
|
|
|
|
The 'get_sense_num' method performs the inverse operation. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 get_sense_num |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Given a WordNet sense key, this method returns the corresponding |
91
|
|
|
|
|
|
|
word-sense string, in the lemma#pos#num format. This function is the |
92
|
|
|
|
|
|
|
inverse of the get_sense_key method; calling one, and then the other, |
93
|
|
|
|
|
|
|
should always return exactly the original input. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Returns an undefined value if the sense cannot be found. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 get_synset |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Given a WordNet sense key, this method returns a list of other sense |
100
|
|
|
|
|
|
|
keys that belong to the same synset. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 get_canonical_sense |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Senses in a synset all have different lemmas. This function selects |
105
|
|
|
|
|
|
|
one particular element of a synset, given a lemma, and any other member |
106
|
|
|
|
|
|
|
of the synset. Thus, for example, run%2:38:04:: and escape%2:38:02:: |
107
|
|
|
|
|
|
|
belong to the same synset. Then |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
get_canonical_sense("escape", "run%2:38:04::"); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
will return escape%2:38:02::, as this is the sense of "escape" that |
112
|
|
|
|
|
|
|
belongs to the same synset as run%2:38:04::. Returns an undefined |
113
|
|
|
|
|
|
|
value if the sense cannot be found. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 SEE ALSO |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
senseidx(5WN), WordNet::Similarity(3), WordNet::QueryData(3) |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
http://wordnet.princeton.edu/ |
120
|
|
|
|
|
|
|
http://www.ai.mit.edu/~jrennie/WordNet |
121
|
|
|
|
|
|
|
http://groups.yahoo.com/group/wn-similarity |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 AUTHOR |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Linas Vepstas |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Copyright (c) 2008, 2009 Linas Vepstas |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
132
|
|
|
|
|
|
|
modify it under the terms of the GNU General Public License |
133
|
|
|
|
|
|
|
as published by the Free Software Foundation; either version 2 |
134
|
|
|
|
|
|
|
of the License, or (at your option) any later version. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
137
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
138
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
139
|
|
|
|
|
|
|
GNU General Public License for more details. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
142
|
|
|
|
|
|
|
along with this program; if not, write to |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
The Free Software Foundation, Inc., |
145
|
|
|
|
|
|
|
59 Temple Place - Suite 330, |
146
|
|
|
|
|
|
|
Boston, MA 02111-1307, USA. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Note: a copy of the GNU General Public License is available on the web |
149
|
|
|
|
|
|
|
at and is included in this |
150
|
|
|
|
|
|
|
distribution as GPL.txt. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
1
|
|
|
1
|
|
28107
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
37
|
|
155
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
156
|
|
|
|
|
|
|
require Exporter; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
BEGIN { |
159
|
1
|
|
|
1
|
|
15
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
110
|
|
160
|
|
|
|
|
|
|
# List of classes from which we are inheriting methods |
161
|
1
|
|
|
1
|
|
18
|
@ISA = qw(Exporter); |
162
|
|
|
|
|
|
|
# Automatically loads these function names to be used without qualification |
163
|
1
|
|
|
|
|
2
|
@EXPORT = qw(); |
164
|
|
|
|
|
|
|
# Allows these functions to be used without qualification |
165
|
1
|
|
|
|
|
2
|
@EXPORT_OK = qw(); |
166
|
1
|
|
|
|
|
1200
|
$VERSION = '1.03'; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
1
|
|
|
1
|
|
8352
|
END { } # module clean-up code here (global destructor) |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# ------------------------------------------------------ |
172
|
|
|
|
|
|
|
# Constructor |
173
|
|
|
|
|
|
|
# Looks in a default path for the sense index file. |
174
|
|
|
|
|
|
|
# Reads it, builds an associative array of file offsets to sense keys. |
175
|
|
|
|
|
|
|
sub new |
176
|
|
|
|
|
|
|
{ |
177
|
0
|
|
|
0
|
0
|
|
my ($class, $wn) = @_; |
178
|
0
|
|
|
|
|
|
my $self = { |
179
|
|
|
|
|
|
|
senseidx_path => "/usr/share/wordnet", |
180
|
|
|
|
|
|
|
senseidx_file => "/usr/share/wordnet/index.sense", |
181
|
|
|
|
|
|
|
wn => $wn, |
182
|
|
|
|
|
|
|
reversed_index => undef, |
183
|
|
|
|
|
|
|
forward_index => undef |
184
|
|
|
|
|
|
|
}; |
185
|
0
|
|
|
|
|
|
bless $self, $class; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Get a valid data path from WordNet::QueryData. |
188
|
0
|
|
|
|
|
|
my $path = $wn->dataPath(); |
189
|
0
|
0
|
|
|
|
|
if (defined($path)) |
190
|
|
|
|
|
|
|
{ |
191
|
0
|
|
|
|
|
|
$self->{senseidx_path} = $path; |
192
|
0
|
|
|
|
|
|
$self->{senseidx_file} = $path . "/index.sense"; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# Open the file for reading |
196
|
0
|
|
|
|
|
|
my $fh = new FileHandle($self->{senseidx_file}); |
197
|
0
|
0
|
|
|
|
|
if (!defined($fh)) |
198
|
|
|
|
|
|
|
{ |
199
|
0
|
|
|
|
|
|
die "Unable to open $self->{senseidx_file}: $!"; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Build a reverse index of sense-keys to offsets. |
203
|
0
|
|
|
|
|
|
my %rev_idx = (); |
204
|
0
|
|
|
|
|
|
my %fwd_idx = (); |
205
|
0
|
|
|
|
|
|
while (<$fh>) |
206
|
|
|
|
|
|
|
{ |
207
|
0
|
|
|
|
|
|
my ($skey, $offset, $snum, $tag_cnt) = split; |
208
|
0
|
|
|
|
|
|
my $keys = $rev_idx{$offset}; |
209
|
|
|
|
|
|
|
# $keys is a reference to an array |
210
|
0
|
|
|
|
|
|
push @$keys, $skey; |
211
|
0
|
|
|
|
|
|
$rev_idx{$offset} = [@$keys]; |
212
|
|
|
|
|
|
|
# print "index entry $skey and $offset so -- @$keys\n"; |
213
|
0
|
|
|
|
|
|
$fwd_idx{$skey} = $snum; |
214
|
|
|
|
|
|
|
} |
215
|
0
|
|
|
|
|
|
undef $fh; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Remember that \% is an array reference. |
218
|
0
|
|
|
|
|
|
$self->{reversed_index} = \%rev_idx; |
219
|
0
|
|
|
|
|
|
$self->{forward_index} = \%fwd_idx; |
220
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
|
return $self; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# report WordNet data dir |
225
|
0
|
|
|
0
|
0
|
|
sub dataPath { my $self = shift; return $self->{senseidx_path}; } |
|
0
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# ------------------------------------------------------ |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub get_sense_key |
230
|
|
|
|
|
|
|
{ |
231
|
0
|
|
|
0
|
1
|
|
my ($self, $lempos) = @_; |
232
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# If the args are undefined, return undefined value. |
235
|
0
|
|
|
|
|
|
my $offset = $wn->offset($lempos); |
236
|
0
|
0
|
|
|
|
|
if (!defined($offset)) |
237
|
|
|
|
|
|
|
{ |
238
|
0
|
|
|
|
|
|
return $offset; |
239
|
|
|
|
|
|
|
} |
240
|
0
|
0
|
|
|
|
|
if (!defined($lempos)) |
241
|
|
|
|
|
|
|
{ |
242
|
0
|
|
|
|
|
|
return $lempos; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Change over to sense-key style notation |
246
|
0
|
0
|
|
|
|
|
if ($lempos) { |
247
|
0
|
|
|
|
|
|
$lempos =~ s/#.*//; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# Tight matching -- failes to find %5 synsets, e.g. sane#a#2 which |
250
|
|
|
|
|
|
|
# maps to sane%5:00:00:rational:00 |
251
|
|
|
|
|
|
|
# $lempos =~ s/#/%/; |
252
|
|
|
|
|
|
|
# $lempos =~ s/%n/%1/; |
253
|
|
|
|
|
|
|
# $lempos =~ s/%v/%2/; |
254
|
|
|
|
|
|
|
# $lempos =~ s/%a/%3/; |
255
|
|
|
|
|
|
|
# $lempos =~ s/%r/%4/; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# make sure its lower-case too. |
258
|
0
|
|
|
|
|
|
$lempos =~ tr/[A-Z]/[a-z]/; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# pad the offet with zeroes, if its too short to be a valid offset. |
262
|
0
|
|
|
|
|
|
my $len = 8 - length($offset); |
263
|
0
|
|
|
|
|
|
for (my $i=0; $i< $len; $i++) { |
264
|
0
|
|
|
|
|
|
$offset = "0" . $offset; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# get the array reference |
268
|
0
|
|
|
|
|
|
my $rev_idx = $self->{reversed_index}; |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
my $keys = $rev_idx->{$offset}; |
271
|
|
|
|
|
|
|
# print "key candidates are @$keys\n"; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Loop over all entries in the synset |
274
|
0
|
|
|
|
|
|
my $foundkey = ""; |
275
|
0
|
|
|
|
|
|
foreach my $sensekey (@$keys) |
276
|
|
|
|
|
|
|
{ |
277
|
0
|
0
|
|
|
|
|
if ($sensekey =~ $lempos) { |
278
|
0
|
|
|
|
|
|
$foundkey = $sensekey; |
279
|
0
|
|
|
|
|
|
last; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
return $foundkey; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# ------------------------------------------------------ |
287
|
|
|
|
|
|
|
sub get_sense_num |
288
|
|
|
|
|
|
|
{ |
289
|
0
|
|
|
0
|
1
|
|
my ($self, $sense_key) = @_; |
290
|
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
|
$sense_key =~ m/([\w\.]+)%(\d+):*/; |
292
|
0
|
|
|
|
|
|
my $lemma = $1; |
293
|
0
|
|
|
|
|
|
my $pos = $2; |
294
|
0
|
|
|
|
|
|
$pos =~ s/1/n/; |
295
|
0
|
|
|
|
|
|
$pos =~ s/2/v/; |
296
|
0
|
|
|
|
|
|
$pos =~ s/3/a/; |
297
|
0
|
|
|
|
|
|
$pos =~ s/4/r/; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# XXX what about 5 ?? |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
my $fwd_idx = $self->{forward_index}; |
302
|
0
|
|
|
|
|
|
my $sense_num = $fwd_idx->{$sense_key}; |
303
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
|
if (!defined($sense_num)) { return $sense_num; } |
|
0
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
|
return $lemma . "#" . $pos . "#" . $sense_num; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# ------------------------------------------------------ |
311
|
|
|
|
|
|
|
# get_synset -- return a wordnet synset. |
312
|
|
|
|
|
|
|
# Given a sense key as input, this will |
313
|
|
|
|
|
|
|
# return a list of sense keys in the synset. |
314
|
|
|
|
|
|
|
sub get_synset |
315
|
|
|
|
|
|
|
{ |
316
|
0
|
|
|
0
|
1
|
|
my ($self, $sense_key) = @_; |
317
|
0
|
|
|
|
|
|
my $sense_str = $self->get_sense_num($sense_key); |
318
|
|
|
|
|
|
|
|
319
|
0
|
0
|
|
|
|
|
if (!defined($sense_str)) { return (); } |
|
0
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
322
|
0
|
|
|
|
|
|
my @synset = $wn->querySense($sense_str, "syns"); |
323
|
0
|
|
|
|
|
|
my @keyset = (); |
324
|
0
|
|
|
|
|
|
foreach (@synset) |
325
|
|
|
|
|
|
|
{ |
326
|
0
|
|
|
|
|
|
my $lempos = $_; |
327
|
0
|
|
|
|
|
|
my $skey = $self->get_sense_key($lempos); |
328
|
0
|
|
|
|
|
|
push @keyset, $skey; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
|
return @keyset; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# ------------------------------------------------------ |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# get_canonical_sense -- get matching lemma from a synset. |
337
|
|
|
|
|
|
|
# Return an alternate sense key that belongs to the same |
338
|
|
|
|
|
|
|
# synset ass the input sense key, but has the the lemmatized |
339
|
|
|
|
|
|
|
# form $lemma at its root. |
340
|
|
|
|
|
|
|
# |
341
|
|
|
|
|
|
|
# Thus, for example: |
342
|
|
|
|
|
|
|
# |
343
|
|
|
|
|
|
|
# get_canonical_sense("join#v", "connect%2:42:02::"); |
344
|
|
|
|
|
|
|
# |
345
|
|
|
|
|
|
|
# will return "join%2:42:01", because "join%2:42:01" is in the same |
346
|
|
|
|
|
|
|
# synset as "connect%2:42:02::", but has "join" as its root. |
347
|
|
|
|
|
|
|
# |
348
|
|
|
|
|
|
|
sub get_canonical_sense |
349
|
|
|
|
|
|
|
{ |
350
|
0
|
|
|
0
|
1
|
|
my ($self, $lemma, $sense) = @_; |
351
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# strip off the part-of-speech marker from the lemma. |
354
|
0
|
|
|
|
|
|
$lemma =~ m/([\w\.]+)#/; |
355
|
0
|
0
|
|
|
|
|
if (defined($1)) |
356
|
|
|
|
|
|
|
{ |
357
|
0
|
|
|
|
|
|
$lemma = $1; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Loop over the synset, looking for a matching form. |
361
|
0
|
|
|
|
|
|
my @synset = $self->get_synset($sense); |
362
|
0
|
|
|
|
|
|
foreach (@synset) |
363
|
|
|
|
|
|
|
{ |
364
|
0
|
|
|
|
|
|
my $altsense = $_; |
365
|
0
|
|
|
|
|
|
$altsense =~ m/([\w\.]+)%/; |
366
|
0
|
0
|
|
|
|
|
if ($1 eq $lemma) |
367
|
|
|
|
|
|
|
{ |
368
|
0
|
|
|
|
|
|
return $altsense; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
|
my $notfound; # this is undefined! |
373
|
0
|
|
|
|
|
|
return $notfound; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# module must return true |
378
|
|
|
|
|
|
|
1; |
379
|
|
|
|
|
|
|
__END__ |