File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/AssigningLabelUsingHungarianAlgo.pm
Criterion Covered Total %
statement 82 89 92.1
branch 11 18 61.1
condition n/a
subroutine 5 5 100.0
pod 0 4 0.0
total 98 116 84.4


line stmt bran cond sub pod time code
1             # Defining the Package for the modules.
2             package Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo;
3              
4 5     5   43537 use Algorithm::Munkres;
  5         22702  
  5         6477  
5              
6              
7             # Defining the class variables.
8             my $matrixToArrangeRef= "matrixRef";
9             my $columnHeaderRef = "colHeaderRef";
10             my $rowHeaderRef = "rowHeaderRef";
11              
12             my $INFINTE_NUMBER = 999999999;
13              
14             #######################################################################################################################
15              
16             =head1 Name
17              
18             Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo - Module which uses Hungarian Algorithm for assigning labels to the clusters.
19              
20             =head1 SYNOPSIS
21              
22             The following code snippet will show how to use this module.
23              
24             # Including the AssigningLabelUsingHungarianAlgo Module.
25             use Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo;
26            
27             # Defining the matrix which contains the similarity scores for labels and clusters.
28             my @mat = ( [ 2, 4, 7 ], [ 3, 9, 5 ], [ 8, 2, 9 ], );
29              
30             # Defining the header for these matrix.
31             my @topicHeader = ("BillClinton", "TonyBlair", "EhudBarak");
32             my @clusterHeader = ("Cluster0", "Cluster1", "Cluster2");
33            
34             # Uncomment these to test unbalanced scenarios where number of cluster and labels are different.
35             # Test Case 2:
36             #my @mat = ( [ 7, 1, 6, 8, 4 ], [ 8, 6, 5, 9, 8 ], [ 7, 6, 5, 8, 2 ], );
37             #my @topicHeader = ("BillClinton", "TonyBlair", "EhudBarak", "SaddamHussien", "VladmirPutin");
38             #my @clusterHeader = ("Cluster0", "Cluster1", "Cluster2");
39            
40             # Test Case 3:
41             #my @mat = ( [ 7, 1, 6 ], [ 8, 6, 5 ], [ 7, 6, 5 ], [ 8, 9, 8 ], [ 1, 0, 1 ]);
42             #my @topicHeader = ("BillClinton", "TonyBlair", "SaddamHussien");
43             #my @clusterHeader = ("Cluster0", "Cluster1", "Cluster2", "Cluster3", "Cluster4");
44              
45              
46             # Creating the Hungarian object.
47             my $hungarainObject = Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo
48             ->new(\@mat, \@topicHeader, \@clusterHeader);
49              
50             # Assigning the labels to clusters using Hungarian algorithm.
51             my $accuracy = $hungarainObject->reAssigningWithHungarianAlgo();
52              
53             # Assigning the labels to clusters using Hungarian algorithm. In this case,
54             # user will get new matrix which contains the mapping between clusters and labels.
55             #my ($accuracy,$finalMatrixRef,$newColumnHeaderRef) =
56             # $hungarainObject->reAssigningWithHungarianAlgo();
57              
58             # Following function will just print matrix for you.
59             #Text::SenseClusters::LabelEvaluation::AssigningLabelUsingHungarianAlgo::printMatrix
60             # ($finalMatrixRef, $newColumnHeaderRef, \@clusterHeader);
61              
62             print "\n\nAccuracy of labels is $accuracy. ";
63             print "\n";
64              
65              
66             =head1 DESCRIPTION
67            
68             This module assign labels for the clusters using the hungarian algorithm.
69              
70             Please refer the following for detailed explaination of hungarian algorithm:
71             http://search.cpan.org/~tpederse/Algorithm-Munkres-0.08/lib/Algorithm/Munkres.pm
72              
73             =cut
74             ##########################################################################################
75              
76              
77              
78              
79              
80             ##########################################################################################
81              
82             =head1 Constructor: new()
83              
84             This is the constructor which will create object for this class.
85             Reference : http://perldoc.perl.org/perlobj.html
86              
87             This constructor takes these argument and intialize it for the class:
88             1. Matrix :
89             This is the two dimensional array, containing the similarity
90             score. We will take the inverse of these scores for hungarian
91             algorithm. As the Hungarian algorithm, uses the minimum scores
92             in assignment(as diagonal score) while we need the maximum scores
93             for the assignment.
94            
95             2. Column Header:
96             This is 1D array, which contains the header information for each
97             Column.
98            
99             2. Row Header:
100             This is 1D array, which contains the header information for each
101             Row.
102            
103             =cut
104              
105             ##########################################################################################
106             sub new {
107             # Creating the object.
108 3     3 0 40 my $class = shift;
109 3         9 my $hungrarianObject = {};
110              
111             # Explicit association is created by the built-in bless function.
112 3         14 bless $hungrarianObject, $class;
113              
114             # Getting the Reference of Matrix-to-print as the argument.
115 3         7 my $matRef = shift;
116             # Getting the matrix from the reference.
117 3         36 $hungrarianObject->{$matrixToArrangeRef} = $matRef;
118            
119             # Getting the Reference of Column-Header matrix as the argument.
120 3         10 my $columnHeadersRef = shift;
121             # Getting the matrix from the reference.
122 3         10 $hungrarianObject->{$columnHeaderRef} = $columnHeadersRef;
123              
124             # Getting the Reference of Column-Header matrix as the argument.
125 3         7 my $rowHeadersRef = shift;
126             # Getting the matrix from the reference.
127 3         11 $hungrarianObject->{$rowHeaderRef} = $rowHeadersRef;
128            
129             # Returning the blessed hash refered by $self.
130 3         13 return $hungrarianObject;
131             }
132              
133              
134             ##########################################################################################
135             =head1 function: reAssigningWithHungarianAlgo
136              
137             This method will assign the labels to each cluster using the Hugarian Algorithm.
138             While assigning the labels it will consider the similarity score of these labels
139             with the gold standard keys.
140            
141             @argument : $hungrarianObject DataType(Reference of the object of this class)
142            
143             @return : $accuracy : DataType(Float)
144             Indicates the overall accuracy of the assignments.
145              
146             OR
147            
148             @return : $accuracy : DataType(Float)
149             Indicates the overall accuracy of the assignments.
150             \@final : DataType(Reference of 2-D Array.)
151             Reference of two dimensional array whose diagonal values contains
152             the similarity score for clusters labels and gold standard keys.
153             \@newColumnHeader: DataType(Reference of 1-D Array.)
154             Reference to new order of the column headers which corresponds
155             to changed diagonal elements.
156            
157             @description :
158             1). It will read the Matrix contianing the similarity score of each cluster
159             labels and gold keys data.
160             2). It will than call a function which will inverse the similarity scores.
161             3). Then, it will call the 'assign' function from the "Algorithm::Munkres" with
162             this similarity scores.
163             4). It will calculate the accuracy for the assignment as
164            
165             Sum (Diagonal Scores)
166             Accuracy = -------------------------
167             Sum (All the Scores)
168             5). Finally, the new arrangement is used to determine the new headers for
169             each column.
170            
171             =cut
172             ##########################################################################################
173             sub reAssigningWithHungarianAlgo{
174            
175             # Getting the Reference of Matrix-to-print as the argument.
176 3     3 0 12 my $hungrarianObject = shift;
177            
178             # Getting the matrix-to-rearranged from the class object.
179 3         9 my $matRef = $hungrarianObject->{$matrixToArrangeRef};
180 3         8 my @mat = @$matRef;
181            
182             # Getting the Column-Header-Matrix as Array from the class object.
183 3         9 my $columnHeaderRefer = $hungrarianObject->{$columnHeaderRef};
184 3         9 my @columnHeaderArray = @$columnHeaderRefer;
185            
186             # Getting the Row-Header matrix as Array from the class object.
187 3         8 my $rowHeaderRefer = $hungrarianObject->{$rowHeaderRef};
188 3         12 my @rowHeaderArray = @$rowHeaderRefer;
189              
190             # Variable to store the total count of the matrix.
191 3         6 my $totalMatrixCount = 0;
192            
193             # Variable to store the total diagonal count of the matrix.
194 3         7 my $totalDiagonalCount = 0;
195            
196             # Variable used to storing the final matrix.
197 3         7 my @final;
198            
199             # Variable used for iteration of the matrix.
200 3         8 my $rowIndex = 0;
201            
202 3         475 print STDERR "\nOriginal Contigency Matrix: \n ";
203 3         25 printMatrix(\@mat,\@columnHeaderArray,\@rowHeaderArray);
204            
205 3         20 my $inversedMatrixRef = inverseMatrixCellValue(\@mat);
206 3         11 my @inversedMatrix = @$inversedMatrixRef;
207            
208             # Calling the "Algorithm::Munkres" to calculate the assignment.
209 3         23 assign( \@inversedMatrix, \@out_mat );
210              
211              
212             # Rearranging the original matrix to get the new matrix.
213 3         1828 foreach $row (0..@out_mat-1){
214 9         24 foreach $col (0..@out_mat-1){
215 27 50       67 if($mat[$row][$out_mat[$col]]){
216 27         91 $final[$row][$col]=$mat[$row][$out_mat[$col]];
217             }else{
218 0         0 $final[$row][$col]= 0;
219             }
220             # Getting the diagonal Count.
221 27 100       63 if($row == $col){
222 9         16 $totalDiagonalCount = $totalDiagonalCount + $final[$row][$col];
223             }
224             # Getting the total Count of the matrix.
225 27         55 $totalMatrixCount = $totalMatrixCount + $final[$row][$col];
226             }
227             }
228              
229            
230             # This array will hold the rearranged column information.
231 3         9 my @newColumnHeader = ();
232 3         20 my $newColIndex=0;
233              
234             # Getting the new rearranged Column header.
235 3         11 foreach $col (0..@out_mat-1){
236 9 50       25 if($columnHeaderArray[$out_mat[$col]]){
237 9         23 $newColumnHeader[$newColIndex++] = $columnHeaderArray[$out_mat[$col]];
238             }else{
239 0         0 $newColumnHeader[$newColIndex++] = "Unknown";
240             }
241             }
242            
243 3         220 print STDERR " \n\n\nContigency Matrix after Hungarian Algorithm: \n ";
244 3         78 printMatrix(\@final, \@newColumnHeader,\@rowHeaderArray);
245 3         151 print STDERR "\n\n\nFinal Conclusion using Hungarian Algorithm::";
246 3         9 $rowIndex = 0;
247 3         9 foreach my $colValue (@newColumnHeader){
248 9 50       28 if($rowHeaderArray[$rowIndex]){
249 9         527 print STDERR "\n\t$rowHeaderArray[$rowIndex]\t<-->\t$colValue";
250             }else{
251 0         0 print STDERR "\n\tUnknown\t\t<-->\t$colValue";
252             }
253 9         23 $rowIndex++;
254             }
255            
256 3         169 print STDERR "\n\n";
257            
258 3         9 my $accuracy = 0;
259            
260             # Calculating the total accuracy of the assignment.
261 3 50       25 if($totalMatrixCount !=0 ){
262 3         12 $accuracy = ($totalDiagonalCount / $totalMatrixCount);
263             }
264            
265             #print STDERR "\n\nAccuracy of labels is $accuracy-->$totalDiagonalCount-->$totalMatrixCount-->\n\n\n";
266             # Reference : http://perldoc.perl.org/functions/wantarray.html
267 3 100       34 return wantarray ? ($accuracy,\@final,\@newColumnHeader) : $accuracy;
268             }
269              
270              
271             ##########################################################################################
272             =head1 function: inverseMatrixCellValue
273              
274             Method will inverse the value of the cell of the input matrix.
275            
276             @argument : $matRef : DataType(Reference of the 2-D Matrix)
277             This is 2-D array containing the integeral values which will be
278             inversed.
279            
280             @return : $inverseMatrixRef : DataType(Reference of the 2-D Matrix)
281             This is 2-D array containing the inversed values for the input
282             2-D array.
283            
284             @description :
285             1). For the input 2-D array containing the array, each value is inversed
286             and store in the new 2-D array
287            
288             1
289             New-value = -------------------
290             Original-Value
291            
292             2). If the Original-Value = 0, New-value = 0.
293            
294             =cut
295             ##########################################################################################
296             sub inverseMatrixCellValue{
297             # Getting the Reference of Matrix as the argument.
298 3     3 0 9 my $matRef = shift;
299             # Getting the matrix from the reference.
300 3         11 my @mat = @$matRef;
301             # Defining the matix which will contains the inverse values of the original matrix.
302 3         7 my @inversedMatrix = ();
303            
304 3         13 foreach $row (0..@mat-1){
305 9         15 foreach $column (0..@{$mat[$row]}-1){
  9         23  
306             # If the matrix is zero, than do not divide it by zero.
307 27 50       66 if($mat[$row][$column]==0){
308 0         0 $inverseMatrix[$row][$column] = $INFINTE_NUMBER;
309 0         0 next;
310             }
311 27         96 $inverseMatrix[$row][$column] = 1/$mat[$row][$column] ;
312             }
313             }
314             # Returning the inversed matrix.
315 3         14 return \@inverseMatrix;
316             }
317              
318              
319             ##########################################################################################
320             =head1 function: printMatrix
321              
322             Method will print the content of 2-D array in the matrix format.
323            
324             @argument1 : $matRef : DataType(Reference of the 2-D Array)
325             This is 2-D array which has to be printed in the matrix format.
326             @argument2 : $colHeaderRef : DataType(Reference of the 1-D array)
327             Reference to array containing header info for columns
328             @argument3 : $rowHeaderRef : DataType(Reference of the 1-D array)
329             Reference to array containing header info for rows.
330            
331             @description :
332             1. Method for printing the matrix. If user provide his/her own headers
333             then this method will use it, otherwise this method will present
334             default headers.
335            
336             =cut
337             ##########################################################################################
338              
339             sub printMatrix{
340            
341             # Getting the Reference of Matrix-to-print as the argument.
342 8     8 0 24 my $matrixToPrintRef = shift;
343             # Getting the matrix from the reference.
344 8         27 my @matrixToPrint = @$matrixToPrintRef;
345              
346             # Getting the Reference of Column-Header matrix as the argument.
347 8         16 my $columnHeaderRef = shift;
348             # Getting the matrix from the reference.
349 8         22 my @columnHeader = @$columnHeaderRef;
350              
351             # Getting the Reference of Column-Header matrix as the argument.
352 8         13 my $rowHeaderRef = shift;
353             # Getting the matrix from the reference.
354 8         30 my @rowHeader = @$rowHeaderRef;
355              
356             # Defining the row index.
357 8         17 my $rowIndex = 0;
358            
359             # Printing the Column Header. If user provide the column header, then use it
360             # otherwise use the default one.
361 8 50       29 if(@columnHeader){
362 8         585 print STDERR "\n";
363 8         27 foreach my $colIndex (@columnHeader){
364 24         1263 print STDERR "\t$colIndex";
365             }
366             }else{
367 0         0 print STDERR "\tColumn1\tColumn2\tColumn3";
368             }
369            
370             # Printing the Content of the Matrix.
371 8         430 print STDERR "\n-------------------------------------------------";
372 8         33 foreach $row (0..@matrixToPrint-1){
373             # If user provide its own row header then use it, otherwise print default header.
374 24 50       87 if($rowHeader[$rowIndex]){
375 24         1290 print STDERR "\n ".$rowHeader[$rowIndex++];
376             }else{
377 0         0 print STDERR "\n Row".++$rowIndex."\t";
378             }
379             # Printing the cell of the matrix.
380 24         42 foreach $column (0..@{$matrixToPrint[$row]}-1){
  24         77  
381 72         3367 print STDERR "\t$matrixToPrint[$row][$column]";
382             }
383 24         1247 print STDERR "\n-------------------------------------------------";
384             }
385             }
386            
387             1;
388              
389              
390             #######################################################################################################
391             =pod
392              
393             =head1 SEE ALSO
394              
395             http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
396            
397             Last modified by :
398             $Id: AssigningLabelUsingHungarianAlgo.pm,v 1.5 2013/03/07 23:19:41 jhaxx030 Exp $
399              
400             =head1 AUTHORS
401              
402             Anand Jha, University of Minnesota, Duluth
403             jhaxx030 at d.umn.edu
404              
405             Ted Pedersen, University of Minnesota, Duluth
406             tpederse at d.umn.edu
407              
408              
409             =head1 COPYRIGHT AND LICENSE
410              
411             Copyright (C) 2012,2013 Ted Pedersen, Anand Jha
412              
413             See http://dev.perl.org/licenses/ for more information.
414              
415             This program is free software; you can redistribute it and/or modify
416             it under the terms of the GNU General Public License as published by
417             the Free Software Foundation; either version 2 of the License, or
418             (at your option) any later version.
419              
420             This program is distributed in the hope that it will be useful,
421             but WITHOUT ANY WARRANTY; without even the implied warranty of
422             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
423             GNU General Public License for more details.
424              
425             You should have received a copy of the GNU General Public License
426             along with this program; if not, write to:
427            
428            
429             The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
430             Boston, MA 02111-1307 USA
431            
432            
433             =cut
434             #######################################################################################################