File Coverage

blib/lib/WordNet/SenseRelate/WordToSet.pm

Criterion	Covered	Total	%
statement	21	180	11.6
branch	0	76	0.0
condition	0	27	0.0
subroutine	7	13	53.8
pod	3	4	75.0
total	31	300	10.3

line	stmt	bran	cond	sub	pod	time	code
1							package WordNet::SenseRelate::WordToSet;
2
3							=head1 NAME
4
5							WordNet::SenseRelate::WordToSet - Find the WordNet Sense of a Target
6							Word that is Most Related to a Given Set of Words
7
8							=head1 SYNOPSIS
9
10							use WordNet::SenseRelate::WordToSet;
11							use WordNet::QueryData;
12							my $qd = WordNet::QueryData->new;
13
14							my %options = (measure => 'WordNet::Similarity::jcn',
15							wordnet => $qd);
16
17							my $mod = WordNet::SenseRelate::WordToSet->new (%options);
18
19							my $res = $mod->disambiguate (target => 'bank',
20							context => [qw/money cash dollar/]);
21
22							# all senses for target and their scores are returned
23							# we will just print the sense most related to the set
24
25							$best_score = -100;
26							foreach my $key (keys %$res) {
27							next unless defined $res->{$key};
28							if ($res->{$key} > $best_score) {
29							$best_score = $res->{$key};
30							$best = $key;
31							}
32							}
33
34							# let's call WordNet::QueryData to get the gloss of the most
35							# related sense of the target to the set
36
37							print "$best : ", join(", ", $qd->querySense($best, "glos")), "\n";
38
39							my $res = $mod->disambiguate (target => 'bank',
40							context => [qw/river shore slope water/]);
41
42							# all senses for target and their scores are returned
43							# we will just print the sense most related to the set
44
45							$best_score = -100;
46							foreach my $key (keys %$res) {
47							next unless defined $res->{$key};
48							if ($res->{$key} > $best_score) {
49							$best_score = $res->{$key};
50							$best = $key;
51							}
52							}
53
54							# let's call WordNet::QueryData to get the gloss of the most
55							# related sense of the target to the set
56
57							print "$best : ", join(", ", $qd->querySense($best, "glos")), "\n";
58
59							=head1 DESCRIPTION
60
61							WordNet::SenseRelate::WordToSet finds the sense of a given target word
62							that is most related to the words in a given set.
63
64							=head2 Methods
65
66							The methods below will die() on serious errors. Wrap calls to these
67							methods in an eval BLOCK to catch the exceptions. See
68							'perldoc -f eval' for more information.
69
70							=over
71
72							=cut
73
74	1			1		1112	use 5.006;
	1					4
	1					49
75	1			1		7	use strict;
	1					2
	1					41
76	1			1		18	use warnings;
	1					2
	1					37
77	1			1		6	use Carp;
	1					2
	1					178
78
79							our @ISA = ();
80							our $VERSION = '0.04';
81
82							my %wordnet;
83							my %simMeasure;
84							my %trace;
85							my %wnformat;
86							my %threshold;
87
88							# constants used to specify trace levels
89							#use constant TR_CONTEXT => 1; # show the context window
90							#use constant TR_BESTSCORE => 2; # show the best score
91							#use constant TR_ALLSCORES => 4; # show all non-zero scores
92
93							# the previous three levels don't make a lot of sense for WordToSet
94							# * The context should be obvious
95							# * All the scores are returned from disambiguate()
96	1			1		7	use constant TR_PAIRWISE => 1; # show all the non-zero similarity scores
	1					2
	1					97
97	1			1		6	use constant TR_ZERO => 2;
	1					1
	1					62
98	1			1		4	use constant TR_MEASURE => 4; # show similarity measure traces
	1					2
	1					2146
99
100							=item BZ<>
101
102							Z<>The constructor for this class.
103
104							Parameters:
105
106							wordnet => REFERENCE : WordNet::QueryData object (required)
107							measure => STRING : name of a WordNet::Similarity measure (required)
108							config => FILENAME : path to a config file for above measure
109							trace => INTEGER : generate traces (default : 0)
110							threshold => NUMBER : similarity scores less than this are ignored
111
112							Returns:
113
114							A reference to the constructed object or undef on error.
115
116							The trace levels are:
117
118							1 show non-zero scores from the semantic relatedness measure
119
120							2 show zero & undefined scores from the relatedness measure
121							(no effect unless combined with level 1)
122
123							4 show traces from the semantic relatedness measure
124
125							Note: the trace levels can be added together to achieve a combined effect.
126							For example, to show the non-zero scores, the zero scores, and the
127							traces from the measure, use level 7.
128
129							=cut
130
131							sub new
132							{
133	0			0	1		my $class = shift;
134	0						my %args = @_;
135	0		0				$class = ref $class \|\| $class;
136
137	0						my $qd;
138							my $measure;
139	0						my $measure_config;
140	0						my $threshold = 0;
141	0						my $trace;
142	0						my $wnformat = 0;
143
144	0						while (my ($key, $val) = each %args) {
145	0	0					if ($key eq 'wordnet') {
		0
		0
		0
		0
		0
146	0						$qd = $val;
147							}
148							elsif ($key eq 'measure') {
149	0						$measure = $val;
150							}
151							elsif ($key eq 'config') {
152	0						$measure_config = $val;
153							}
154							elsif ($key eq 'threshold') {
155	0						$threshold = $val;
156							}
157							elsif ($key eq 'trace') {
158	0						$trace = $val;
159							}
160							elsif ($key eq 'wnformat') {
161	0						$wnformat = $val;
162							}
163							else {
164	0						croak "Unknown parameter type '$key'";
165							}
166							}
167
168	0	0					unless (ref $qd) {
169	0						croak "No WordNet::QueryData object supplied";
170							}
171
172	0	0					unless ($measure) {
173	0						croak "No relatedness measure supplied";
174							}
175
176	0						my $self = bless [], $class;
177
178							# initialize tracing
179	0	0					if (defined $trace) {
180	0						$trace{$self} = {level => $trace, string => ''};
181	0	0	0				if (($trace & TR_ZERO) and !($trace & TR_PAIRWISE)) {
182	0						warn ("Warning: trace level ", TR_ZERO,
183							" has no effect unless combined with level ", TR_PAIRWISE,
184							".\n");
185							}
186							}
187							else {
188	0						$trace{$self} = {level => 0, string => ''};
189							}
190
191							# setup relatedness measure
192	0						my $file = $measure;
193	0						$file =~ s/::/\//g;
194	0						require "${file}.pm";
195
196	0	0					if (defined $measure_config) {
197	0						$simMeasure{$self} = $measure->new ($qd, $measure_config);
198							}
199							else {
200	0						$simMeasure{$self} = $measure->new ($qd);
201							}
202
203							# check for errors
204	0						my ($errCode, $errStr) = $simMeasure{$self}->getError;
205	0	0					if ($errCode) {
206	0						croak $errStr;
207							}
208
209							# turn on traces in the relatedness measure if required
210	0	0					if ($trace{$self}->{level} & TR_MEASURE) {
211	0						$simMeasure{$self}->{trace} = 1;
212							}
213							else {
214	0						$simMeasure{$self}->{trace} = 0;
215							}
216
217	0						$wordnet{$self} = $qd;
218
219							# store threshold value
220	0						$threshold{$self} = $threshold;
221
222	0						$wnformat{$self} = $wnformat;
223
224	0						return $self;
225							}
226
227							sub DESTROY
228							{
229	0			0			my $self = shift;
230
231	0						delete $wordnet{$self};
232	0						delete $simMeasure{$self};
233	0						delete $threshold{$self};
234	0						delete $trace{$self};
235	0						delete $wnformat{$self};
236
237	0						1;
238							}
239
240
241							=item disambiguate
242
243							Disambiguates the target word
244
245							Parameters:
246
247							target => STRING : the target word to disambiguate (required)
248							context => REFERENCE : a reference to an array of context words
249
250							Returns:
251
252							A hash reference. The keys of the hash will be the senses of the
253							target word, and the values will be the score for each sense.
254
255							=cut
256
257							sub disambiguate
258							{
259	0			0	1		my $self = shift;
260	0						my %options = @_;
261
262							# local vars
263	0						my @context;
264							my $target;
265
266	0						while (my ($key, $val) = each %options) {
267	0	0					if ($key eq 'target') {
		0
		0
268	0						$target = $val;
269							}
270							elsif ($key eq 'context') {
271	0	0					if ('ARRAY' eq ref $val) {
272	0						@context = @$val;
273							}
274							else {
275	0						carp "Value for option 'context' is not an array reference";
276	0						return undef;
277							}
278							}
279							elsif ($key eq 'threshold') {
280	0						$threshold{$self} = $val;
281							}
282							else {
283	0						croak "Unknown option '$key'";
284							}
285							}
286
287	0						my $tagged = 0;
288
289							# quick sanity check to ensure that all words are in WordNet
290	0						my $qd = $wordnet{$self};
291	0	0					if ($wnformat{$self}) {
292	0						foreach my $word ($target, @context) {
293	0						my @t = $qd->querySense ($word);
294	0	0					unless (scalar @t) {
295	0						warn "'$word' is not found in WordNet\n";
296	0						return undef;
297							}
298							}
299							}
300							else {
301	0						foreach my $word ($target, @context) {
302	0						my @t = $qd->validForms ($target);
303	0	0					unless (scalar @t) {
304	0						warn "'$word' is not found in WordNet\n";
305	0						return undef;
306							}
307							}
308							}
309
310	0						my $result;
311	0						$result = $self->doNormal ($target, @context);
312
313	0						return $result;
314							}
315
316							sub doNormal
317							{
318	0			0	0		my $self = shift;
319	0						my $target = shift;
320	0						my @context = @_;
321	0						my $measure = $simMeasure{$self};
322	0						my $threshold = $threshold{$self};
323
324	0						my $tracelevel = $trace{$self}->{level};
325	0						my @traces;
326
327							# get senses for the target and context words
328	0						my @targetsenses = $self->_getSenses ($target);
329	0						my @contextsenses;
330	0						for my $i (0..$#context) {
331	0						$contextsenses[$i] = [$self->_getSenses ($context[$i])];
332							}
333
334
335							# now disambiguate the target
336
337	0						my @sums;
338	0						for my $targetsense (0..$#targetsenses) {
339	0						$sums[$targetsense] = 0;
340
341	0						for my $i (0..$#contextsenses) {
342	0	0					next if 0 == scalar $contextsenses[$i];
343	0						my @tempScores;
344
345	0						for my $k (0..$#{$contextsenses[$i]}) {
	0
346	0	0					unless (defined $contextsenses[$i][$k]) {
347	0						warn "\$contextsenses[$i][$k] is undef";
348							}
349
350	0						$tempScores[$k] =
351							$measure->getRelatedness ($targetsenses[$targetsense],
352							$contextsenses[$i][$k]);
353							}
354
355	0						my $max = -1;
356	0						my $maxidx = -1;
357	0						for my $n (0..$#tempScores) {
358	0	0					if ($tracelevel & TR_PAIRWISE) {
359	0	0	0				if (($tempScores[$n] && $tempScores[$n] > 0)
			0
360							\|\| ($tracelevel & TR_ZERO)) {
361	0	0					unless (defined $contextsenses[$i][$n]) {
362	0						warn "\$contextsenses[$i][$n] is undef";
363							}
364	0	0					my $s = " "
365							. $targetsenses[$targetsense] . ' '
366							. $contextsenses[$i][$n] . ' '
367							. (defined $tempScores[$n]
368							? $tempScores[$n]
369							: 'undef');
370
371	0						push @{$traces[$targetsense]}, $s;
	0
372							}
373							}
374
375	0	0					if ($tracelevel & TR_MEASURE) {
376	0	0	0				if (($tempScores[$n] && $tempScores[$n] > 0)
			0
377							\|\| ($tracelevel & TR_ZERO)) {
378	0						push @{$traces[$targetsense]}, $measure->getTraceString;
	0
379							}
380							}
381
382	0						$measure->getError; # clear errors from relatedness object
383
384	0	0	0				if (defined $tempScores[$n] && ($tempScores[$n] > $max)) {
385	0						$max = $tempScores[$n];
386	0						$maxidx = $n;
387							}
388
389							}
390
391	0	0					$sums[$targetsense] += $max if $max > $threshold;
392							}
393							}
394
395	0						my $max = -1;
396	0						my $maxidx = -1;
397	0						foreach my $p (0..$#sums) {
398	0	0					if ($sums[$p] > $max) {
399	0						$maxidx = $p;
400	0						$max = $sums[$p];
401							}
402
403							# if ($tracelevel & TR_ALLSCORES
404							# && (($sums[$p] > 0) or ($tracelevel & TR_ZERO))) {
405							# $trace{$self}->{string} .= " $targetsenses[$p]: $sums[$p]\n";
406							# }
407
408	0	0	0				if (($tracelevel & TR_MEASURE or $tracelevel & TR_PAIRWISE)
			0
409							&& defined $traces[$p]) {
410	0						for my $str (@{$traces[$p]}) {
	0
411	0						$trace{$self}->{string} .= $str . "\n";
412							}
413							}
414							}
415
416
417
418	0						my %rhash;
419	0						my $best_sense = '';
420	0						my $best_score = -1;
421	0						foreach my $i (0..$#sums) {
422	0	0					if ($sums[$i] > $best_score) {
423	0						$best_sense = $targetsenses[$i];
424	0						$best_score = $sums[$i];
425							}
426
427	0	0					$rhash{$targetsenses[$i]} = $sums[$i] if $sums[$i] > $threshold;
428							}
429
430							# if ($tracelevel & TR_BESTSCORE) {
431							# if ($best_score >= 0) {
432							# $trace{$self}->{string} .= " Winning sense: $best_sense\n";
433							# $trace{$self}->{string} .= " Winning score: $best_score\n";
434							# }
435							# else {
436							# $trace{$self}->{string} .= " Winning sense: (none)\n";
437							# $trace{$self}->{string} .= " Winning score: (none)\n";
438							# }
439							# }
440
441	0						return \%rhash;
442
443							# if ($maxidx >= 0) {
444							# return $targetsenses[$maxidx];
445							# }
446
447							# return $target;
448							}
449
450							=item B
451
452							Gets the current trace string and resets it to "".
453
454							Parameters:
455
456							None
457
458							Returns:
459
460							The current trace string (before resetting it). If the returned string
461							is not empty, it will end with a newline.
462
463							Example:
464
465							my $str = $wsd->getTrace ();
466							print $str;
467
468							=cut
469
470							sub getTrace
471							{
472	0			0	1		my $self = shift;
473
474	0	0					return '' unless $trace{$self};
475
476	0						my $s = $trace{$self}->{string};
477	0						$trace{$self}->{string} = '';
478	0						return $s;
479							}
480
481							sub _getSenses
482							{
483	0			0			my $self = shift;
484	0						my $word = shift;
485	0						my $qd = $wordnet{$self};
486	0						my @senses;
487
488							# first get all forms for each POS
489	0	0					if ($word =~ /\#o/) {
490	0						@senses = undef;
491							}
492							else {
493	0						my @forms;
494	0	0					unless ($wnformat{$self}) {
495	0						@forms = $qd->validForms ($word);
496							}
497							else {
498	0						@forms = $word;
499							}
500
501	0	0					if (scalar @forms == 0) {
502	0						@senses = ();
503							}
504							else {
505							# now get all the senses for each form
506	0						foreach my $form (@forms) {
507	0						my @temps = $qd->querySense ($form);
508	0						push @senses, @temps;
509							}
510							}
511							}
512
513	0						return @senses;
514							}
515
516
517							1;
518
519							__END__