File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/SimilarityScore.pm

Criterion	Covered	Total	%
statement	34	34	100.0
branch	5	8	62.5
condition			n/a
subroutine	6	6	100.0
pod	0	2	0.0
total	45	50	90.0

line	stmt	bran	sub	pod	time	code
1						#!/usr/bin/perl -w
2
3						# Declaring the Package for the module.
4						package Text::SenseClusters::LabelEvaluation::SimilarityScore;
5
6	5		5		41395	use strict;
	5				11
	5				245
7	5		5		1263	use encoding "utf-8";
	5				19875
	5				49
8
9						# The following two lines will make this module inherit from the Exporter Class.
10						require Exporter;
11						our @ISA = qw(Exporter);
12
13
14						# Using Text Similarity Module.
15						# Reference: http://search.cpan.org/~tpederse
16						# /Text-Similarity-0.08/lib/Text/Similarity.pm
17	5		5		8032	use Text::Similarity::Overlaps;
	5				34112
	5				332
18
19
20						#######################################################################################################################
21
22						=head1 Name
23
24						Text::SenseClusters::LabelEvaluation::SimilarityScore - Module for getting the similarity score between the contents of the two files.
25
26						=head1 SYNOPSIS
27
28						# The following code snippet will show how to use SimilarityScore.
29						package Text::SenseClusters::LabelEvaluation::Test_SimilarityScore;
30
31						# Including the LabelEvaluation Module.
32						use Text::SenseClusters::LabelEvaluation::SimilarityScore;
33
34
35						my $firstString = "IBM::: vice president, million dollars, Wall Street, Deep Blue, ".
36						"International Business, Business Machines, International Machines, ".
37						"United States, Justice Department, personal computers";
38						my $secondString = "vice president, million dollars, Deep Blue, International Business, ".
39						"Business Machines, International Machines, United States, Justice Department";
40
41						my $similarityObject = Text::SenseClusters::LabelEvaluation::SimilarityScore->
42						new($firstString,$secondString, "./stoplist.txt");
43
44						#my $score = $similarityObject->computeOverlappingScores();
45						my ($score, %allScores) = $similarityObject->computeOverlappingScores();
46
47						print "Score:: $score \n";
48						print "Lesk Score :: $allScores{'lesk'} \n";
49						print "Raw Lesk Score :: $allScores{'raw_lesk'} \n";
50						print "precision Score :: $allScores{'precision'} \n";
51						print "recall Score :: $allScores{'recall'} \n";
52						print "F Score :: $allScores{'F'} \n";
53						print "dice Score :: $allScores{'dice'} \n";
54						print "E Score :: $allScores{'E'} \n";
55						print "cosine Score :: $allScores{'cosine'} \n";
56						print "\n\n";
57
58
59						=head1 DESCRIPTION
60
61						This module provide a function that will compare the two strings and return
62						the overlapping scores. Please refer the following for details description
63						how it will calculate the similarity score:
64						http://search.cpan.org/~tpederse/Text-Similarity-0.09/
65
66						=cut
67
68
69						# Member variable of the class.
70						my $clusterData = "ClusterData";
71						my $topicData = "TopicData";
72						my $stopListFileLoc = "StopListLoc";
73						my $verbose = "Verbose";
74
75
76						##########################################################################################
77
78						=head1 Constructor: new()
79
80						This is the constructor which will create object for this class.
81						Reference : http://perldoc.perl.org/perlobj.html
82
83						This constructor takes these argument and intialize it for the class:
84
85						1. $clusterData : Datatype: String
86						This variable contains the labels generated by the SenseClusters.
87						2. $scoreObject : Datatype: String
88						This variable contains the Gold standard key's data.
89						3. $stopListFileLoc : Datatype: String
90						This variable contains the user defined location for the stop list file.
91						4. $verbose : Datatype: integer
92						This variable tells whether to display all type of similarity score or not.
93
94						=cut
95
96						##########################################################################################
97						sub new {
98
99						# Creating the object.
100	73		73	0	263	my $class = shift;
101	73				188	my $scoreObject = {};
102
103						# Explicit association is created by the built-in bless function.
104	73				615	bless $scoreObject, $class;
105
106						# Getting the ClusterData from the argument.
107	73				433	$scoreObject->{$clusterData} = shift;
108
109						# Getting the Topic data from the argument.
110	73				263	$scoreObject->{$topicData} = shift;
111
112						# Getting the stop list file location.
113	73				241	$scoreObject->{$stopListFileLoc} = shift;
114
115						# Getting the verbose option by user.
116	73				230	$scoreObject->{$verbose} = shift;
117
118						# Returning the blessed hash refered by $self.
119	73				255	return $scoreObject;
120						}
121
122
123						########################################################################################
124						=head1 Function: computeOverlappingScores
125
126						Function that will compare the labels file with the wiki files and
127						will return the overlapping score.
128
129						@argument1 : Name of the cluster file.
130						@argument2 : Name of the file containing the data from Wikipedia.
131						@argument3 : Name of the file containing the stop word lists.
132
133						@return : Return the overlapping scores between these files.
134
135						@description :
136						1). Reading the file name from the command line argument.
137						2). Invoking the Text::Similarity::Overlaps module and passing
138						the file names for similarity comparison.
139						3). Then overlapping scores obtained from this module is returned
140						as the similarity value.
141
142						=cut
143
144						#########################################################################################
145
146						sub computeOverlappingScores{
147
148						# Reading the object as the argument.
149	73		73	0	430	my $readFileObject = shift;
150
151						# Getting the Cluster's Label as the FirstString.
152	73				229	my $firstString = $readFileObject->{$clusterData};
153
154						# Getting the Gold Data as the SecondString for comparison.
155	73				184	my $secondString = $readFileObject->{$topicData};
156
157						# Getting the stop list file location.
158	73				166	my $stopListFileLocation = $readFileObject->{$stopListFileLoc};
159
160						# Getting the verbose option by user.
161	73				156	my $verboseOption = $readFileObject->{$verbose};
162
163	73	100			292	if(!defined $stopListFileLocation){
164						# Getting the module name.
165	72				152	my $module = "Text/SenseClusters/LabelEvaluation/SimilarityScore.pm";
166
167						# Finding its installed location.
168	72				290	my $moduleInstalledLocation = $INC{$module};
169
170						# Getting the prefix of installed location. This will be one of
171						# the values in array @INC.
172	72				902	$moduleInstalledLocation =~
173						m/(.*)Text\/SenseClusters\/LabelEvaluation\/SimilarityScore\.pm$/g;
174
175						# Getting the installed stopList.txt location using above location.
176						# For e.g.:
177						# /usr/local/share/perl/5.10.1/Text/SenseClusters
178						# /LabelEvaluation/stoplist.txt
179	72				493	$stopListFileLocation
180						= $1."/Text/SenseClusters/LabelEvaluation/stoplist.txt";
181						}
182
183						# Setting the Options for getting the results from the Text::Similarity
184						# Module.
185	73				419	my %options = ('verbose' => $verboseOption, 'stoplist' => $stopListFileLocation);
186
187						# Creating the new Overlaps Object.
188	73				468	my $mod = Text::Similarity::Overlaps->new (\%options);
189
190						# If the object is not created, then quit the program with error message.
191	73	50			227224	defined $mod or die "Construction of Text::Similarity::Overlaps failed";
192
193						# Getting the overlapping score from the Similarity function.
194	73				420	my ($score, %allScores)= $mod->getSimilarityStrings ($firstString, $secondString);
195
196
197						# Printing the Similarity Score for the files.
198						#print "The similarity of $firstString and $secondString is : $score\n";
199						#print "The similarity of $firstString and $secondString is : $allScores{'lesk'}\n";
200
201						# Reference : http://perldoc.perl.org/functions/wantarray.html
202	73	50			21859292	return wantarray ? ($score, %allScores) : $score;
203						}
204
205
206						sub DESTROY {
207	73		73		1650	my $self = shift;
208	73	50			794	$self->{handle}->close() if $self->{handle};
209						}
210
211						#######################################################################################################
212						=pod
213
214
215						=head1 SEE ALSO
216
217						http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
218
219
220						Last modified by :
221						$Id: SimilarityScore.pm,v 1.6 2013/03/18 00:47:24 jhaxx030 Exp $
222
223
224						=head1 AUTHORS
225
226						Anand Jha, University of Minnesota, Duluth
227						jhaxx030 at d.umn.edu
228
229						Ted Pedersen, University of Minnesota, Duluth
230						tpederse at d.umn.edu
231
232						=head1 COPYRIGHT AND LICENSE
233
234						Copyright (C) 2012 Ted Pedersen, Anand Jha
235
236						See http://dev.perl.org/licenses/ for more information.
237
238						This program is free software; you can redistribute it and/or modify
239						it under the terms of the GNU General Public License as published by
240						the Free Software Foundation; either version 2 of the License, or
241						(at your option) any later version.
242
243						This program is distributed in the hope that it will be useful,
244						but WITHOUT ANY WARRANTY; without even the implied warranty of
245						MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
246						GNU General Public License for more details.
247
248						You should have received a copy of the GNU General Public License
249						along with this program; if not, write to:
250
251
252						The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
253						Boston, MA 02111-1307 USA
254
255
256						=cut
257						#######################################################################################################
258
259
260						# Making the default return statement as 1;
261						# Reference : http://lists.netisland.net/archives/phlpm/phlpm-2001/msg00426.html
262						1;