File Coverage

blib/lib/WordNet/Similarity/vector.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				# WordNet::Similarity::vector.pm version 2.04
2				# (Last updated $Id: vector.pm,v 1.24 2008/03/27 06:21:17 sidz1979 Exp $)
3				#
4				# Module accepts two WordNet synsets and returns a floating point
5				# number that indicates how similar those two synsets are, using a
6				# gloss vector overlap measure based on "context vectors" described by
7				# Schütze (1998).
8				#
9				# Copyright (c) 2005,
10				#
11				# Ted Pedersen, University of Minnesota Duluth
12				# tpederse at d.umn.edu
13				#
14				# Siddharth Patwardhan, University of Utah, Salt Lake City
15				# sidd at cs.utah.edu
16				#
17				# Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh
18				# banerjee+ at cs.cmu.edu
19				#
20				# This program is free software; you can redistribute it and/or
21				# modify it under the terms of the GNU General Public License
22				# as published by the Free Software Foundation; either version 2
23				# of the License, or (at your option) any later version.
24				#
25				# This program is distributed in the hope that it will be useful,
26				# but WITHOUT ANY WARRANTY; without even the implied warranty of
27				# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28				# GNU General Public License for more details.
29				#
30				# You should have received a copy of the GNU General Public License
31				# along with this program; if not, write to
32				#
33				# The Free Software Foundation, Inc.,
34				# 59 Temple Place - Suite 330,
35				# Boston, MA 02111-1307, USA.
36				#
37				# ------------------------------------------------------------------
38
39				package WordNet::Similarity::vector;
40
41				=head1 NAME
42
43				WordNet::Similarity::vector - Perl module for computing semantic relatedness
44				of word senses using second order co-occurrence vectors of glosses of the word
45				senses.
46
47				=head1 SYNOPSIS
48
49				use WordNet::Similarity::vector;
50
51				use WordNet::QueryData;
52
53				my $wn = WordNet::QueryData->new();
54
55				my $vector = WordNet::Similarity::vector->new($wn);
56
57				my $value = $vector->getRelatedness("car#n#1", "bus#n#2");
58
59				($error, $errorString) = $vector->getError();
60
61				die "$errorString\n" if($error);
62
63				print "car (sense 1) <-> bus (sense 2) = $value\n";
64
65				=head1 DESCRIPTION
66
67				SchEtze (1998) creates what he calls context vectors (second order
68				co-occurrence vectors) of pieces of text for the purpose of Word Sense
69				Discrimination. This idea is adopted by Patwardhan and Pedersen to represent
70				the word senses by second-order co-occurrence vectors of their dictionary
71				(WordNet) definitions. The relatedness of two senses is then computed as
72				the cosine of their representative gloss vectors.
73
74				=over
75
76				=cut
77
78	1	1	2683	use strict;
	1		2
	1		33
79	1	1	627	use WordNet::get_wn_info;
	1		2
	1		49
80	1	1	6	use WordNet::stem;
	1		2
	1		28
81	1	1	570	use WordNet::vectorFile;
	1		3
	1		45
82	1	1	67	use WordNet::Similarity;
	0
	0
83				use File::Spec;
84				use vars qw($VERSION @ISA);
85
86				@ISA = qw(WordNet::Similarity);
87
88				$VERSION = '2.04';
89
90				WordNet::Similarity::addConfigOption("relation", 0, "p", undef);
91				WordNet::Similarity::addConfigOption("vectordb", 0, "p", undef);
92				WordNet::Similarity::addConfigOption("stop", 0, "p", undef);
93				WordNet::Similarity::addConfigOption("stem", 0, "i", 0);
94				WordNet::Similarity::addConfigOption("textsize", 0, "i", "-1");
95
96				=item $vector->setPosList()
97
98				This method is internally called to determine the parts of speech
99				this measure is capable of dealing with.
100
101				Parameters: none.
102
103				Returns: none.
104
105				=cut
106
107				sub setPosList
108				{
109				my $self = shift;
110				$self->{n} = 1;
111				$self->{v} = 1;
112				$self->{a} = 1;
113				$self->{r} = 1;
114				return 1;
115				}
116
117				=item $vector->initialize($file)
118
119				Overrides the initialize method in the parent class (GlossFinder.pm). This method
120				essentially initializes the measure for use.
121
122				Parameters: $file -- configuration file.
123
124				Returns: none.
125
126				=cut
127
128				# Initialization of the WordNet::Similarity::vector object... parses the config file and sets up
129				# global variables, or sets them to default values.
130				# INPUT PARAMS : $paramFile .. File containing the module specific params.
131				# RETURN VALUES : (none)
132				sub initialize
133				{
134				my $self = shift;
135				my $vectorDB;
136				my $documentCount;
137				my $wn = $self->{wn};
138				my $gwi;
139				my $readDims;
140				my $readVectors;
141				my %stopHash = ();
142
143				# Stemming? Compounds? StopWords?
144				$self->{stem} = 0;
145				$self->{stopHash} = {};
146
147				# Call the initialize method of the super-class.
148				$self->SUPER::initialize(@_);
149
150				# Initialize the vector cache.
151				$self->{vCache} = ();
152				$self->{vCacheQ} = ();
153				$self->{vCacheSize} = 80;
154
155				# Load the stop list.
156				if(defined $self->{stop})
157				{
158				my $line;
159				my $stopFile = $self->{stop};
160
161				if(open(STOP, $stopFile))
162				{
163				while($line = )
164				{
165				$line =~ s/[\r\f\n]//g;
166				$line =~ s/^\s+//;
167				$line =~ s/\s+$//;
168				$line =~ s/\s+/_/g;
169				$stopHash{$line} = 1;
170				$self->{stopHash}->{$line} = 1;
171				}
172				close(STOP);
173				}
174				else
175				{
176				$self->{errorString} .= "\nWarning (WordNet::Similarity::vector->initialize()) - ";
177				$self->{errorString} .= "Unable to open $stopFile.";
178				$self->{error} = 1 if($self->{error} < 1);
179				}
180				}
181
182				# so now we are ready to initialize the get_wn_info package with
183				# the wordnet object, 0/1 depending on if stemming is required and
184				# the stop hash
185				if($self->{stem})
186				{
187				$gwi = WordNet::get_wn_info->new($wn, 1, %stopHash);
188				$self->{gwi} = $gwi;
189				}
190				else
191				{
192				$gwi = WordNet::get_wn_info->new($wn, 0, %stopHash);
193				$self->{gwi} = $gwi;
194				}
195
196				# Initialize the word vector database interface...
197				if(!defined $self->{vectordb} \|\| $self->{vectordb} eq "")
198				{
199				my $path;
200				my $header;
201				my @possiblePaths = ();
202				$vectorDB = "";
203
204				# Look for all possible default data files installed.
205				foreach $path (@INC)
206				{
207				# JM 1-16-04 -- modified to use File::Spec
208				my $file = File::Spec->catfile($path, 'WordNet', 'wordvectors.dat');
209				push @possiblePaths, $file if(-e $file);
210				}
211
212				# If there are multiple possibilities, get the one in the correct format.
213				foreach $path (@possiblePaths)
214				{
215				next if(!open(VECTORS, $path));
216				$header = ;
217				$header =~ s/\s+//g;
218				if($header =~ /DOCUMENTCOUNT/)
219				{
220				$vectorDB = $path;
221				$self->{vectordb} = $path;
222				close(VECTORS);
223				last;
224				}
225				close(VECTORS);
226				}
227				}
228				else
229				{
230				$vectorDB = $self->{vectordb};
231				}
232
233				# Initialize the word vector database interface...
234				if(!defined $vectorDB \|\| $vectorDB eq "")
235				{
236				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
237				$self->{errorString} .= "No usable Word Vector database found. Use configuration file.";
238				$self->{error} = 2;
239				return;
240				}
241
242				# Get the documentCount, dimensions and vectors...
243				($documentCount, $readDims, $readVectors) = WordNet::vectorFile->readVectors($vectorDB);
244				if(!defined $documentCount \|\| !defined $readDims \|\| !defined $readVectors)
245				{
246				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
247				$self->{errorString} .= "Error reading the vector database file.";
248				$self->{error} = 2;
249				return;
250				}
251
252				# Load the word vector dimensions...
253				my $key;
254				$self->{numberOfDimensions} = scalar(keys(%{$readDims}));
255				foreach $key (keys %{$readDims})
256				{
257				my $ans = $readDims->{$key};
258				my @prts = split(/\s+/, $ans);
259				$self->{wordIndex}->{$key} = $prts[0];
260				$self->{indexWord}->[$prts[0]] = $key;
261				}
262
263				# Set up the interface to the word vectors...
264				foreach $key (keys %{$readVectors})
265				{
266				my $vec = $readVectors->{$key};
267				if(defined $vec)
268				{
269				$self->{table}->{$key} = $vec;
270				}
271				}
272
273				# If relation file not specified... manually add the relations to
274				# be used... Look for the default vector relation file...
275				if(!defined $self->{relation})
276				{
277				my $path;
278				my $header;
279				my @possiblePaths = ();
280
281				# Look for all possible default data files installed.
282				foreach $path (@INC)
283				{
284				# JM 1-16-04 -- modified to use File::Spec
285				my $file = File::Spec->catfile($path, 'WordNet', 'vector-relation.dat');
286				push @possiblePaths, $file if(-e $file);
287				}
288
289				# If there are multiple possibilities, get the one in the correct format.
290				foreach $path (@possiblePaths)
291				{
292				next if(!open(RELATIONS, $path));
293				$header = ;
294				$header =~ s/\s+//g;
295				if($header =~ /VectorRelationFile/)
296				{
297				$self->{relation} = $path;
298				close(RELATIONS);
299				last;
300				}
301				close(RELATIONS);
302				}
303				}
304				if(!(defined $self->{relation}))
305				{
306				$self->{weights}->[0] = 1;
307				$self->{functions}->[0]->[0] = "glosexample";
308				}
309				else
310				{
311				# Load the relations data
312				my $header;
313				my $relation;
314				my $relationFile = $self->{relation};
315
316				if(open(RELATIONS, $relationFile))
317				{
318				$header = ;
319				$header =~ s/[\r\f\n]//g;
320				$header =~ s/\s+//g;
321				if($header =~ /VectorRelationFile/)
322				{
323				my $index = 0;
324				$self->{functions} = ();
325				$self->{weights} = ();
326				while($relation = )
327				{
328				$relation =~ s/[\r\f\n]//g;
329
330				# now for each line in the file, extract the
331				# nested functions if any, check if they are defined,
332				# if it makes sense to nest them, and then finally put
333				# them into the @functions triple dimensioned array!
334
335				# remove leading/trailing spaces from the relation
336				$relation =~ s/^\s+//;
337				$relation =~ s/\s+$//;
338
339				# now extract the weight if any. if no weight, assume 1
340				if($relation =~ /(\S+)\s+(\S+)/)
341				{
342				$relation = $1;
343				$self->{weights}->[$index] = $2;
344				}
345				else
346				{
347				$self->{weights}->[$index] = 1;
348				}
349
350				# Need to remove strict for this block.
351				{
352				no strict;
353
354				$relation =~ s/[\s\)]//g;
355				my @functionArray = split(/\(/, $relation);
356
357				my $j = 0;
358				my $fn = $functionArray[$#functionArray];
359				if(!($gwi->can($fn)))
360				{
361				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
362				$self->{errorString} .= "Undefined function ($functionArray[$#functionArray]) in relations file.";
363				$self->{error} = 2;
364				close(RELATIONS);
365				return;
366				}
367
368				$self->{functions}->[$index]->[$j++] = $functionArray[$#functionArray];
369				my $input;
370				my $output;
371				my $dummy;
372				my $k;
373
374				for ($k = $#functionArray-1; $k >= 0; $k--)
375				{
376				my $fn2 = $functionArray[$k];
377				my $fn3 = $functionArray[$k+1];
378				if(!($gwi->can($fn2)))
379				{
380				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
381				$self->{errorString} .= "Undefined function ($functionArray[$k]) in relations file.";
382				$self->{error} = 2;
383				close(RELATIONS);
384				return;
385				}
386
387				($input, $dummy) = $gwi->$fn2($dummy, 1);
388				($dummy, $output) = $gwi->$fn3($dummy, 1);
389
390				if($input != $output)
391				{
392				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
393				$self->{errorString} .= "Invalid function combination - $functionArray[$k]($functionArray[$k+1]).";
394				$self->{error} = 2;
395				close(RELATIONS);
396				return;
397				}
398
399				$self->{functions}->[$index]->[$j++] = $functionArray[$k];
400				}
401
402				# if the output of the outermost function is synset array (1)
403				# wrap a glosexample around it
404				my $xfn = $functionArray[0];
405				($dummy, $output) = $gwi->$xfn($dummy, 1);
406				if($output == 1)
407				{
408				$self->{functions}->[$index]->[$j++] = "glosexample";
409				}
410				}
411
412				$index++;
413				}
414				}
415				else
416				{
417				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
418				$self->{errorString} .= "Bad file format ($relationFile).";
419				$self->{error} = 2;
420				close(RELATIONS);
421				return;
422				}
423				close(RELATIONS);
424				}
425				else
426				{
427				$self->{errorString} .= "\nError (WordNet::Similarity::vector->initialize()) - ";
428				$self->{errorString} .= "Unable to open $relationFile.";
429				$self->{error} = 2;
430				return;
431				}
432				}
433
434				$self->{textsize} = -1 if(!defined $self->{textsize});
435				}
436
437				=item $vector->traceOptions()
438
439				This method is internally called to determine the extra options
440				specified by this measure (apart from the default options specified
441				in the WordNet::Similarity base class).
442
443				Parameters: none.
444
445				Returns: none.
446
447				=cut
448
449				# show all config options specific to this module
450				sub traceOptions {
451				my $self = shift;
452				$self->{traceString} .= "relation File :: ".((defined $self->{relation})?"$self->{relation}":"")."\n";
453				$self->{traceString} .= "vectorDB File :: ".((defined $self->{vectordb})?"$self->{vectordb}":"")."\n";
454				$self->{traceString} .= "stop File :: ".((defined $self->{stop})?"$self->{stop}":"")."\n";
455				$self->{traceString} .= "stem :: $self->{stem}\n";
456				$self->{traceString} .= "textsize :: $self->{textsize}\n";
457				}
458
459				=item $vector->getRelatedness
460
461				Computes the relatedness of two word senses using the Vector Algorithm.
462
463				Parameters: two word senses in "word#pos#sense" format.
464
465				Returns: Unless a problem occurs, the return value is the relatedness
466				score, which is greater-than or equal-to 0. If an error occurs,
467				then the error level is set to non-zero and an error
468				string is created (see the description of getError()).
469
470				=cut
471
472				sub getRelatedness
473				{
474				my $self = shift;
475				my $wps1 = shift;
476				my $wps2 = shift;
477				my $wn = $self->{wn};
478				my $wntools = $self->{wntools};
479				my $gwi = $self->{gwi};
480
481				# Check the existence of the WordNet::QueryData object.
482				if(!$wn)
483				{
484				$self->{errorString} .= "\nError (WordNet::Similarity::vector->getRelatedness()) - ";
485				$self->{errorString} .= "A WordNet::QueryData object is required.";
486				$self->{error} = 2;
487				return undef;
488				}
489
490				# Check the existence of the WordNet::Tools object.
491				if(!$wntools)
492				{
493				$self->{errorString} .= "\nError (WordNet::Similarity::vector->getRelatedness()) - ";
494				$self->{errorString} .= "A WordNet::Tools object is required.";
495				$self->{error} = 2;
496				return undef;
497				}
498
499				# Initialize traces.
500				$self->{traceString} = "" if($self->{trace});
501
502				# Undefined input cannot go unpunished.
503				if(!$wps1 \|\| !$wps2)
504				{
505				$self->{errorString} .= "\nWarning (WordNet::Similarity::vector->getRelatedness()) - Undefined input values.";
506				$self->{error} = 1 if($self->{error} < 1);
507				return undef;
508				}
509
510				# Security check -- are the input strings in the correct format (word#pos#sense).
511				if($wps1 !~ /^\S+\#([nvar])\#\d+$/)
512				{
513				$self->{errorString} .= "\nWarning (WordNet::Similarity::vector->getRelatedness()) - ";
514				$self->{errorString} .= "Input not in word\#pos\#sense format.";
515				$self->{error} = ($self->{error} < 1) ? 1 : $self->{error};
516				return undef;
517				}
518				if($wps2 !~ /^\S+\#([nvar])\#\d+$/)
519				{
520				$self->{errorString} .= "\nWarning (WordNet::Similarity::vector->getRelatedness()) - ";
521				$self->{errorString} .= "Input not in word\#pos\#sense format.";
522				$self->{error} = ($self->{error} < 1) ? 1 : $self->{error};
523				return undef;
524				}
525
526				# Now check if the similarity value for these two synsets is in
527				# fact in the cache... if so return the cached value.
528				my $relatedness = $self->{doCache} ? $self->fetchFromCache($wps1, $wps2) : undef;
529				defined $relatedness and return $relatedness;
530
531				# Are the gloss vectors present in the cache...
532				if(defined $self->{vCache}->{$wps1} && defined $self->{vCache}->{$wps2})
533				{
534				if($self->{trace})
535				{
536				# ah so we do need SOME traces! put in the synset names.
537				$self->{traceString} .= "Synset 1: $wps1 (Gloss Vector found in Cache)\n";
538				$self->{traceString} .= "Synset 2: $wps2 (Gloss Vector found in Cache)\n";
539				}
540				my $a = $self->{vCache}->{$wps1};
541				my $b = $self->{vCache}->{$wps2};
542				my $score = &_inner($a, $b);
543
544				# that does all the scoring. Put in cache if doing cacheing. Then
545				# return the score.
546				$self->{doCache} and $self->storeToCache($wps1, $wps2, $score);
547				return $score;
548				}
549
550				# we shall put the first synset in a "set" of itself, and the
551				# second synset in another "set" of itself. These sets may
552				# increase in size as the functions are applied (since some
553				# relations have a one to many mapping).
554
555				# initialize the score
556				my $score = 0;
557
558				# and now go thru the functions array, get the strings and do the scoring
559				my $i = 0;
560				my %overlaps;
561				my $firstString = "";
562				my $secondString = "";
563				while(defined $self->{functions}->[$i])
564				{
565				my $functionsString = "";
566				my $funcStringPrinted = 0;
567				my $functionsScore = 0;
568
569				# see if any traces reqd. if so, create the functions string
570				# however don't send it to the trace string immediately - will
571				# print it only if there are any overlaps for this rel
572				if($self->{trace})
573				{
574				$functionsString = "Functions: ";
575				my $j = 0;
576				while(defined $self->{functions}->[$i]->[$j])
577				{
578				$functionsString .= ($self->{functions}->[$i]->[$j])." ";
579				$j++;
580				}
581				}
582
583				# now get the string for the first set of synsets
584				my %seth1 = ();
585				$seth1{$wps1} = 1;
586				my @arguments = \%seth1;
587
588				# apply the functions to the arguments, passing the output of
589				# the inner functions to the inputs of the outer ones
590				my $j = 0;
591				no strict;
592
593				while(defined $self->{functions}->[$i]->[$j])
594				{
595				my $fn = $self->{functions}->[$i]->[$j];
596				@arguments = $gwi->$fn(@arguments);
597				$j++;
598				}
599
600				# finally we should have one cute little string!
601				$firstString .= $arguments[0];
602
603				# next do all this for the string for the second set
604				my %seth2 = ();
605				$seth2{$wps2} = 1;
606				@arguments = \%seth2;
607
608				$j = 0;
609				while(defined $self->{functions}->[$i]->[$j])
610				{
611				my $fn = $self->{functions}->[$i]->[$j];
612				@arguments = $gwi->$fn(@arguments);
613				$j++;
614				}
615
616				$secondString .= $arguments[0];
617
618				# check if the two strings need to be reported in the trace.
619				if($self->{trace})
620				{
621				if(!$funcStringPrinted)
622				{
623				$self->{traceString} .= "$functionsString\n";
624				$funcStringPrinted = 1;
625				}
626				}
627
628				$i++;
629				}
630
631				# Preprocess...
632				$firstString =~ s/\'//g;
633				$firstString =~ s/[^a-z0-9]+/ /g;
634				$firstString =~ s/^\s+//;
635				$firstString =~ s/\s+$//;
636				$firstString = $wntools->compoundify($firstString);
637				$secondString =~ s/\'//g;
638				$secondString =~ s/[^a-z0-9]+/ /g;
639				$secondString =~ s/^\s+//;
640				$secondString =~ s/\s+$//;
641				$secondString = $wntools->compoundify($secondString);
642
643				# Get vectors... score...
644				my $a;
645				my $maga;
646				my $sizea;
647				my $b;
648				my $magb;
649				my $sizeb;
650				my $trr;
651
652				# see if any traces reqd. if so, put in the synset arrays.
653				if($self->{trace})
654				{
655				# ah so we do need SOME traces! put in the synset names.
656				$self->{traceString} .= "Synset 1: $wps1";
657				}
658				$sizea = 0;
659				if(defined $self->{vCache}->{$wps1})
660				{
661				$a = $self->{vCache}->{$wps1};
662				$self->{traceString} .= " (Gloss vector found in cache)\n" if($self->{trace});
663				}
664				else
665				{
666				($a, $trr, $maga, $sizea) = $self->_getVector($firstString);
667				$self->{traceString} .= "\nString: \"$firstString\"\n$trr\n" if($self->{trace});
668				&_norm($a, $maga);
669				$self->{vCache}->{$wps1} = $a;
670				push(@{$self->{vCacheQ}}, $wps1);
671				while(scalar(@{$self->{vCacheQ}}) > $self->{vCacheSize})
672				{
673				my $wps = shift(@{$self->{vCacheQ}});
674				delete $self->{vCache}->{$wps}
675				}
676				}
677
678				if($self->{trace})
679				{
680				# ah so we do need SOME traces! put in the synset names.
681				$self->{traceString} .= "Synset 2: $wps2";
682				}
683				$sizeb = 0;
684				if(defined $self->{vCache}->{$wps2})
685				{
686				$b = $self->{vCache}->{$wps2};
687				$self->{traceString} .= " (Gloss vector found in cache)\n" if($self->{trace});
688				}
689				else
690				{
691				($b, $trr, $magb, $sizeb) = $self->_getVector($secondString);
692				$self->{traceString} .= "\nString: \"$secondString\"\n$trr\n" if($self->{trace});
693				&_norm($b, $magb);
694				$self->{vCache}->{$wps2} = $b;
695				push(@{$self->{vCacheQ}}, $wps2);
696				while(scalar(@{$self->{vCacheQ}}) > $self->{vCacheSize})
697				{
698				my $wps = shift(@{$self->{vCacheQ}});
699				delete $self->{vCache}->{$wps}
700				}
701				}
702
703				$score = &_inner($a, $b);
704
705				# that does all the scoring. Put in cache if doing cacheing. Then
706				# return the score.
707				$self->{doCache} and $self->storeToCache($wps1, $wps2, $score);
708
709				return $score;
710				}
711
712
713				# Method to compute a context vector from a given body of text...
714				sub _getVector
715				{
716				my $self = shift;
717				my $text = shift;
718				my $ret = {};
719				return ($ret, "", 0, 0) if(!defined $text);
720				my @words = split(/\s+/, $text);
721				my $word;
722				my %types;
723				my $fstFlag = 1;
724				my $localTraces = "";
725				my $kk;
726				my $mag;
727				my $count = 0;
728
729				# [trace]
730				if($self->{trace})
731				{
732				$localTraces .= "Word Vectors for: ";
733				}
734				# [/trace]
735
736				foreach $word (@words)
737				{
738				if($word !~ /[XGES]{3}\d{5}[XGES]{3}/)
739				{
740				$types{$word} = 1;
741				$count++;
742				last if($self->{textsize} >= 0 && $count > $self->{textsize});
743				}
744				}
745				foreach $word (keys %types)
746				{
747				if(defined $self->{table}->{$word} && !defined $self->{stopHash}->{$word})
748				{
749				my %pieces = split(/\s+/, $self->{table}->{$word});
750
751				# [trace]
752				if($self->{trace})
753				{
754				$localTraces .= ", " if(!$fstFlag);
755				$localTraces .= "$word";
756				$fstFlag = 0;
757				}
758				# [/trace]
759
760				foreach $kk (keys %pieces)
761				{
762				$ret->{$kk} = ((defined $ret->{$kk})?($ret->{$kk}):0) + $pieces{$kk};
763				}
764				}
765				}
766
767				$mag = 0;
768				foreach $kk (keys %{$ret})
769				{
770				$mag += ($ret->{$kk} * $ret->{$kk});
771				}
772
773				return ($ret, $localTraces, sqrt($mag), $count);
774				}
775
776				# Normalizes the sparse vector.
777				sub _norm
778				{
779				my $vec = shift;
780				my $mag = shift;
781
782				if(defined $vec && defined $mag && $mag != 0)
783				{
784				my $key;
785				foreach $key (keys %{$vec})
786				{
787				$vec->{$key} /= $mag;
788				}
789				}
790				}
791
792				# Inner product of two sparse vectors.
793				sub _inner
794				{
795				my $vec1 = shift;
796				my $vec2 = shift;
797				my ($size1, $size2);
798				my $prod = 0;
799
800				return 0 if(!defined $vec1 \|\| !defined $vec2);
801
802				$size1 = scalar(keys(%{$vec1}));
803				$size2 = scalar(keys(%{$vec2}));
804
805				if(defined $size1 && defined $size2 && $size1 < $size2)
806				{
807				my $key;
808				foreach $key (keys %{$vec1})
809				{
810				$prod += ($vec1->{$key} * $vec2->{$key}) if(defined $vec2->{$key});
811				}
812				}
813				else
814				{
815				my $key;
816				foreach $key (keys %{$vec2})
817				{
818				$prod += ($vec1->{$key} * $vec2->{$key}) if(defined $vec1->{$key});
819				}
820				}
821
822				return $prod;
823				}
824
825				1;
826
827				__END__