| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*- perl -*- |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# WordNet::SenseKey.pm version 1.03 |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# Given an WordNet file offset, return the corresponding sense key |
|
6
|
|
|
|
|
|
|
# Meant to be used with WordNet::Similarity, which does not normally |
|
7
|
|
|
|
|
|
|
# manipulate data using sense keys. |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# Copyright (c) 2008 Linas Vepstas linasvepstas at gmail.com |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
|
12
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
|
13
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
|
14
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
|
17
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
18
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
19
|
|
|
|
|
|
|
# GNU General Public License for more details. |
|
20
|
|
|
|
|
|
|
# |
|
21
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
|
22
|
|
|
|
|
|
|
# along with this program; if not, write to |
|
23
|
|
|
|
|
|
|
# |
|
24
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
|
25
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
|
26
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
|
27
|
|
|
|
|
|
|
# |
|
28
|
|
|
|
|
|
|
# ------------------------------------------------------------------ |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
package WordNet::SenseKey; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NAME |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
WordNet::SenseKey - convert WordNet sense keys to sense numbers, and v.v. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use WordNet::QueryData; |
|
39
|
|
|
|
|
|
|
use WordNet::SenseKey; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $wn = WordNet::QueryData->new("/usr/share/wordnet"); |
|
42
|
|
|
|
|
|
|
my $sk = WordNet::SenseKey->new($wn); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $skey = $sk->get_sense_key("run#v#2"); |
|
45
|
|
|
|
|
|
|
print "Found the sense key $skey for run#v#2\n"; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $sense = $sk->get_sense_num($skey); |
|
48
|
|
|
|
|
|
|
print "Found sense $sense for key $skey\n"; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my @synset = $sk->get_synset($skey); |
|
51
|
|
|
|
|
|
|
print "Synset is @synset\n"; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my $can = $sk->get_canonical_sense("escape", "run%2:38:04::"); |
|
54
|
|
|
|
|
|
|
print "Found sense $can\n"; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
The WordNet::Similarity package is designed to work with words in the |
|
59
|
|
|
|
|
|
|
form of lemma#pos#num where "lemma" is the word lemma, "pos" is the |
|
60
|
|
|
|
|
|
|
part of speech, and "num" is the sense number. Unfortuantely, the |
|
61
|
|
|
|
|
|
|
sense numbering is not stable from one WordNet release to another. |
|
62
|
|
|
|
|
|
|
Thus, for external programs, it can often be more useful to work with |
|
63
|
|
|
|
|
|
|
sense keys. Unfortunately, the Wordnet::Similarity package is unaware |
|
64
|
|
|
|
|
|
|
of sense keys. This class fills that gap. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
WordNet senses keys are described in greater detail in |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
http://wordnet.princeton.edu/man/senseidx.5WN.html |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
There are four routines implemented here: |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
get_sense_key($sense); |
|
73
|
|
|
|
|
|
|
get_sense_num($sense_key); |
|
74
|
|
|
|
|
|
|
get_synset($sense_key); |
|
75
|
|
|
|
|
|
|
get_canonical_sense($lemma, $sense_key); |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 get_sense_key |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Given a word sense, in the form of lemma#pos#num, this method returns |
|
80
|
|
|
|
|
|
|
the corresponding sense key, as defined by WordNet. Here, "lemma" is the |
|
81
|
|
|
|
|
|
|
word lemma, "pos" is the part of speech, and "num" is the sense number. |
|
82
|
|
|
|
|
|
|
The format of WordNet sense keys is documented in senseidx(5WN), one of |
|
83
|
|
|
|
|
|
|
the WordNet man pages. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Returns an undefined value if the sense key cannot be found. |
|
86
|
|
|
|
|
|
|
The 'get_sense_num' method performs the inverse operation. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 get_sense_num |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Given a WordNet sense key, this method returns the corresponding |
|
91
|
|
|
|
|
|
|
word-sense string, in the lemma#pos#num format. This function is the |
|
92
|
|
|
|
|
|
|
inverse of the get_sense_key method; calling one, and then the other, |
|
93
|
|
|
|
|
|
|
should always return exactly the original input. |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Returns an undefined value if the sense cannot be found. |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 get_synset |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Given a WordNet sense key, this method returns a list of other sense |
|
100
|
|
|
|
|
|
|
keys that belong to the same synset. |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 get_canonical_sense |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Senses in a synset all have different lemmas. This function selects |
|
105
|
|
|
|
|
|
|
one particular element of a synset, given a lemma, and any other member |
|
106
|
|
|
|
|
|
|
of the synset. Thus, for example, run%2:38:04:: and escape%2:38:02:: |
|
107
|
|
|
|
|
|
|
belong to the same synset. Then |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
get_canonical_sense("escape", "run%2:38:04::"); |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
will return escape%2:38:02::, as this is the sense of "escape" that |
|
112
|
|
|
|
|
|
|
belongs to the same synset as run%2:38:04::. Returns an undefined |
|
113
|
|
|
|
|
|
|
value if the sense cannot be found. |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
senseidx(5WN), WordNet::Similarity(3), WordNet::QueryData(3) |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
http://wordnet.princeton.edu/ |
|
120
|
|
|
|
|
|
|
http://www.ai.mit.edu/~jrennie/WordNet |
|
121
|
|
|
|
|
|
|
http://groups.yahoo.com/group/wn-similarity |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=head1 AUTHOR |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Linas Vepstas |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Copyright (c) 2008, 2009 Linas Vepstas |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
|
132
|
|
|
|
|
|
|
modify it under the terms of the GNU General Public License |
|
133
|
|
|
|
|
|
|
as published by the Free Software Foundation; either version 2 |
|
134
|
|
|
|
|
|
|
of the License, or (at your option) any later version. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
|
137
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
138
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
139
|
|
|
|
|
|
|
GNU General Public License for more details. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
142
|
|
|
|
|
|
|
along with this program; if not, write to |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
The Free Software Foundation, Inc., |
|
145
|
|
|
|
|
|
|
59 Temple Place - Suite 330, |
|
146
|
|
|
|
|
|
|
Boston, MA 02111-1307, USA. |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Note: a copy of the GNU General Public License is available on the web |
|
149
|
|
|
|
|
|
|
at and is included in this |
|
150
|
|
|
|
|
|
|
distribution as GPL.txt. |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
|
153
|
|
|
|
|
|
|
|
|
154
|
1
|
|
|
1
|
|
28107
|
use strict; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
37
|
|
|
155
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
50
|
|
|
156
|
|
|
|
|
|
|
require Exporter; |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
BEGIN { |
|
159
|
1
|
|
|
1
|
|
15
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
110
|
|
|
160
|
|
|
|
|
|
|
# List of classes from which we are inheriting methods |
|
161
|
1
|
|
|
1
|
|
18
|
@ISA = qw(Exporter); |
|
162
|
|
|
|
|
|
|
# Automatically loads these function names to be used without qualification |
|
163
|
1
|
|
|
|
|
2
|
@EXPORT = qw(); |
|
164
|
|
|
|
|
|
|
# Allows these functions to be used without qualification |
|
165
|
1
|
|
|
|
|
2
|
@EXPORT_OK = qw(); |
|
166
|
1
|
|
|
|
|
1200
|
$VERSION = '1.03'; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
1
|
|
|
1
|
|
8352
|
END { } # module clean-up code here (global destructor) |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# ------------------------------------------------------ |
|
172
|
|
|
|
|
|
|
# Constructor |
|
173
|
|
|
|
|
|
|
# Looks in a default path for the sense index file. |
|
174
|
|
|
|
|
|
|
# Reads it, builds an associative array of file offsets to sense keys. |
|
175
|
|
|
|
|
|
|
sub new |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
0
|
|
|
0
|
0
|
|
my ($class, $wn) = @_; |
|
178
|
0
|
|
|
|
|
|
my $self = { |
|
179
|
|
|
|
|
|
|
senseidx_path => "/usr/share/wordnet", |
|
180
|
|
|
|
|
|
|
senseidx_file => "/usr/share/wordnet/index.sense", |
|
181
|
|
|
|
|
|
|
wn => $wn, |
|
182
|
|
|
|
|
|
|
reversed_index => undef, |
|
183
|
|
|
|
|
|
|
forward_index => undef |
|
184
|
|
|
|
|
|
|
}; |
|
185
|
0
|
|
|
|
|
|
bless $self, $class; |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Get a valid data path from WordNet::QueryData. |
|
188
|
0
|
|
|
|
|
|
my $path = $wn->dataPath(); |
|
189
|
0
|
0
|
|
|
|
|
if (defined($path)) |
|
190
|
|
|
|
|
|
|
{ |
|
191
|
0
|
|
|
|
|
|
$self->{senseidx_path} = $path; |
|
192
|
0
|
|
|
|
|
|
$self->{senseidx_file} = $path . "/index.sense"; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# Open the file for reading |
|
196
|
0
|
|
|
|
|
|
my $fh = new FileHandle($self->{senseidx_file}); |
|
197
|
0
|
0
|
|
|
|
|
if (!defined($fh)) |
|
198
|
|
|
|
|
|
|
{ |
|
199
|
0
|
|
|
|
|
|
die "Unable to open $self->{senseidx_file}: $!"; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Build a reverse index of sense-keys to offsets. |
|
203
|
0
|
|
|
|
|
|
my %rev_idx = (); |
|
204
|
0
|
|
|
|
|
|
my %fwd_idx = (); |
|
205
|
0
|
|
|
|
|
|
while (<$fh>) |
|
206
|
|
|
|
|
|
|
{ |
|
207
|
0
|
|
|
|
|
|
my ($skey, $offset, $snum, $tag_cnt) = split; |
|
208
|
0
|
|
|
|
|
|
my $keys = $rev_idx{$offset}; |
|
209
|
|
|
|
|
|
|
# $keys is a reference to an array |
|
210
|
0
|
|
|
|
|
|
push @$keys, $skey; |
|
211
|
0
|
|
|
|
|
|
$rev_idx{$offset} = [@$keys]; |
|
212
|
|
|
|
|
|
|
# print "index entry $skey and $offset so -- @$keys\n"; |
|
213
|
0
|
|
|
|
|
|
$fwd_idx{$skey} = $snum; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
0
|
|
|
|
|
|
undef $fh; |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Remember that \% is an array reference. |
|
218
|
0
|
|
|
|
|
|
$self->{reversed_index} = \%rev_idx; |
|
219
|
0
|
|
|
|
|
|
$self->{forward_index} = \%fwd_idx; |
|
220
|
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
|
return $self; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# report WordNet data dir |
|
225
|
0
|
|
|
0
|
0
|
|
sub dataPath { my $self = shift; return $self->{senseidx_path}; } |
|
|
0
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# ------------------------------------------------------ |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub get_sense_key |
|
230
|
|
|
|
|
|
|
{ |
|
231
|
0
|
|
|
0
|
1
|
|
my ($self, $lempos) = @_; |
|
232
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# If the args are undefined, return undefined value. |
|
235
|
0
|
|
|
|
|
|
my $offset = $wn->offset($lempos); |
|
236
|
0
|
0
|
|
|
|
|
if (!defined($offset)) |
|
237
|
|
|
|
|
|
|
{ |
|
238
|
0
|
|
|
|
|
|
return $offset; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
0
|
0
|
|
|
|
|
if (!defined($lempos)) |
|
241
|
|
|
|
|
|
|
{ |
|
242
|
0
|
|
|
|
|
|
return $lempos; |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Change over to sense-key style notation |
|
246
|
0
|
0
|
|
|
|
|
if ($lempos) { |
|
247
|
0
|
|
|
|
|
|
$lempos =~ s/#.*//; |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# Tight matching -- failes to find %5 synsets, e.g. sane#a#2 which |
|
250
|
|
|
|
|
|
|
# maps to sane%5:00:00:rational:00 |
|
251
|
|
|
|
|
|
|
# $lempos =~ s/#/%/; |
|
252
|
|
|
|
|
|
|
# $lempos =~ s/%n/%1/; |
|
253
|
|
|
|
|
|
|
# $lempos =~ s/%v/%2/; |
|
254
|
|
|
|
|
|
|
# $lempos =~ s/%a/%3/; |
|
255
|
|
|
|
|
|
|
# $lempos =~ s/%r/%4/; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# make sure its lower-case too. |
|
258
|
0
|
|
|
|
|
|
$lempos =~ tr/[A-Z]/[a-z]/; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# pad the offet with zeroes, if its too short to be a valid offset. |
|
262
|
0
|
|
|
|
|
|
my $len = 8 - length($offset); |
|
263
|
0
|
|
|
|
|
|
for (my $i=0; $i< $len; $i++) { |
|
264
|
0
|
|
|
|
|
|
$offset = "0" . $offset; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# get the array reference |
|
268
|
0
|
|
|
|
|
|
my $rev_idx = $self->{reversed_index}; |
|
269
|
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
my $keys = $rev_idx->{$offset}; |
|
271
|
|
|
|
|
|
|
# print "key candidates are @$keys\n"; |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Loop over all entries in the synset |
|
274
|
0
|
|
|
|
|
|
my $foundkey = ""; |
|
275
|
0
|
|
|
|
|
|
foreach my $sensekey (@$keys) |
|
276
|
|
|
|
|
|
|
{ |
|
277
|
0
|
0
|
|
|
|
|
if ($sensekey =~ $lempos) { |
|
278
|
0
|
|
|
|
|
|
$foundkey = $sensekey; |
|
279
|
0
|
|
|
|
|
|
last; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
return $foundkey; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# ------------------------------------------------------ |
|
287
|
|
|
|
|
|
|
sub get_sense_num |
|
288
|
|
|
|
|
|
|
{ |
|
289
|
0
|
|
|
0
|
1
|
|
my ($self, $sense_key) = @_; |
|
290
|
|
|
|
|
|
|
|
|
291
|
0
|
|
|
|
|
|
$sense_key =~ m/([\w\.]+)%(\d+):*/; |
|
292
|
0
|
|
|
|
|
|
my $lemma = $1; |
|
293
|
0
|
|
|
|
|
|
my $pos = $2; |
|
294
|
0
|
|
|
|
|
|
$pos =~ s/1/n/; |
|
295
|
0
|
|
|
|
|
|
$pos =~ s/2/v/; |
|
296
|
0
|
|
|
|
|
|
$pos =~ s/3/a/; |
|
297
|
0
|
|
|
|
|
|
$pos =~ s/4/r/; |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# XXX what about 5 ?? |
|
300
|
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
my $fwd_idx = $self->{forward_index}; |
|
302
|
0
|
|
|
|
|
|
my $sense_num = $fwd_idx->{$sense_key}; |
|
303
|
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
|
if (!defined($sense_num)) { return $sense_num; } |
|
|
0
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
|
return $lemma . "#" . $pos . "#" . $sense_num; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# ------------------------------------------------------ |
|
311
|
|
|
|
|
|
|
# get_synset -- return a wordnet synset. |
|
312
|
|
|
|
|
|
|
# Given a sense key as input, this will |
|
313
|
|
|
|
|
|
|
# return a list of sense keys in the synset. |
|
314
|
|
|
|
|
|
|
sub get_synset |
|
315
|
|
|
|
|
|
|
{ |
|
316
|
0
|
|
|
0
|
1
|
|
my ($self, $sense_key) = @_; |
|
317
|
0
|
|
|
|
|
|
my $sense_str = $self->get_sense_num($sense_key); |
|
318
|
|
|
|
|
|
|
|
|
319
|
0
|
0
|
|
|
|
|
if (!defined($sense_str)) { return (); } |
|
|
0
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
|
322
|
0
|
|
|
|
|
|
my @synset = $wn->querySense($sense_str, "syns"); |
|
323
|
0
|
|
|
|
|
|
my @keyset = (); |
|
324
|
0
|
|
|
|
|
|
foreach (@synset) |
|
325
|
|
|
|
|
|
|
{ |
|
326
|
0
|
|
|
|
|
|
my $lempos = $_; |
|
327
|
0
|
|
|
|
|
|
my $skey = $self->get_sense_key($lempos); |
|
328
|
0
|
|
|
|
|
|
push @keyset, $skey; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
|
return @keyset; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# ------------------------------------------------------ |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# get_canonical_sense -- get matching lemma from a synset. |
|
337
|
|
|
|
|
|
|
# Return an alternate sense key that belongs to the same |
|
338
|
|
|
|
|
|
|
# synset ass the input sense key, but has the the lemmatized |
|
339
|
|
|
|
|
|
|
# form $lemma at its root. |
|
340
|
|
|
|
|
|
|
# |
|
341
|
|
|
|
|
|
|
# Thus, for example: |
|
342
|
|
|
|
|
|
|
# |
|
343
|
|
|
|
|
|
|
# get_canonical_sense("join#v", "connect%2:42:02::"); |
|
344
|
|
|
|
|
|
|
# |
|
345
|
|
|
|
|
|
|
# will return "join%2:42:01", because "join%2:42:01" is in the same |
|
346
|
|
|
|
|
|
|
# synset as "connect%2:42:02::", but has "join" as its root. |
|
347
|
|
|
|
|
|
|
# |
|
348
|
|
|
|
|
|
|
sub get_canonical_sense |
|
349
|
|
|
|
|
|
|
{ |
|
350
|
0
|
|
|
0
|
1
|
|
my ($self, $lemma, $sense) = @_; |
|
351
|
0
|
|
|
|
|
|
my $wn = $self->{wn}; |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# strip off the part-of-speech marker from the lemma. |
|
354
|
0
|
|
|
|
|
|
$lemma =~ m/([\w\.]+)#/; |
|
355
|
0
|
0
|
|
|
|
|
if (defined($1)) |
|
356
|
|
|
|
|
|
|
{ |
|
357
|
0
|
|
|
|
|
|
$lemma = $1; |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Loop over the synset, looking for a matching form. |
|
361
|
0
|
|
|
|
|
|
my @synset = $self->get_synset($sense); |
|
362
|
0
|
|
|
|
|
|
foreach (@synset) |
|
363
|
|
|
|
|
|
|
{ |
|
364
|
0
|
|
|
|
|
|
my $altsense = $_; |
|
365
|
0
|
|
|
|
|
|
$altsense =~ m/([\w\.]+)%/; |
|
366
|
0
|
0
|
|
|
|
|
if ($1 eq $lemma) |
|
367
|
|
|
|
|
|
|
{ |
|
368
|
0
|
|
|
|
|
|
return $altsense; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
|
my $notfound; # this is undefined! |
|
373
|
0
|
|
|
|
|
|
return $notfound; |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# module must return true |
|
378
|
|
|
|
|
|
|
1; |
|
379
|
|
|
|
|
|
|
__END__ |