File Coverage

blib/lib/Graph/Similarity.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Graph::Similarity;
2              
3 1     1   37333 use warnings;
  1         3  
  1         36  
4 1     1   6 use strict;
  1         2  
  1         45  
5              
6 1     1   871 use version;
  1         2508  
  1         6  
7             our $VERSION = qv('0.0.5');
8              
9 1     1   725 use Moose;
  0            
  0            
10              
11             use Graph::Similarity::SimRank;
12             use Graph::Similarity::SimilarityFlooding;
13             use Graph::Similarity::CoupledNodeEdgeScoring;
14              
15             has 'graph' => (is => 'rw', isa => 'ArrayRef[Graph]', required => 1);
16              
17             __PACKAGE__->meta->make_immutable;
18             no Moose;
19              
20             #===========================================================================
21             # Select which algorithm is used. And make sure the proper graph is used.
22             # arg1: algorithm - Please check perldoc for more details
23             #===========================================================================
24             sub use {
25             my ($self, $algo) = @_;
26             my $g = $self->graph;
27              
28             my $method;
29             if ($algo eq "SimRank") {
30              
31             die "This algorithm can only apply to single graph\n" if (scalar @$g != 1);
32             die "The graph needs to be directed graph\n" unless ($$g[0]->is_directed);
33             $method = new Graph::Similarity::SimRank(graph => $$g[0]);
34              
35             }elsif ($algo eq "SimilarityFlooding"){
36              
37             die "This algorithm can only applied to two graph\n" if (scalar @$g != 2);
38             die "The graph needs to be multiedged\n" unless ($$g[0]->is_multiedged && $$g[1]->is_multiedged);
39             die "The graph needs to be directed graph\n" unless ($$g[0]->is_directed && $$g[1]->is_directed);
40             $method = new Graph::Similarity::SimilarityFlooding(graph => $g);
41             }
42              
43             elsif ($algo eq "CoupledNodeEdgeScoring"){
44              
45             die "This algorithm can applied to only two graphs\n" if (scalar @$g != 2);
46             die "The graph needs to be directed graph\n" unless ($$g[0]->is_directed && $$g[1]->is_directed);
47             $method = new Graph::Similarity::CoupledNodeEdgeScoring(graph => $g);
48             }
49             else {
50             die "$algo is not supported\n";
51             }
52              
53             return $method;
54             }
55              
56              
57              
58              
59             1; # Magic true value required at end of module
60             __END__
61              
62             =head1 NAME
63              
64             Graph::Similarity - Calculate similarity of the vertices in graph(s)
65              
66             =head1 VERSION
67              
68             This document describes Graph::Similarity version 0.0.5
69              
70              
71             =head1 SYNOPSIS
72              
73             use Graph;
74             use Graph::Similarity;
75              
76             my $g = Graph->new; # Use Graph module
77             $g->add_vertices("a","b","c","d","e");
78             $g->add_edges(['a', 'b'], ['b', 'c'], ['a', 'd'], ['d', 'e']);
79              
80             # Calculate by SimRank
81             my $s = new Graph::Similarity(graph => [$g]);
82             my $method = $s->use('SimRank');
83             $method->setConstnact(0.8);
84             $method->calculate();
85             $method->showAllSimilarities;
86             $method->getSimilarity("c","e");
87              
88             #===============================================
89             # Or by Coupled Node Edge Scoring
90             my $g1 = Graph->new;
91             $g1->add_vertices("A","B","C");
92             $g1->add_edges(['A', 'B'], ['B','C']);
93              
94             my $g2 = Graph->new;
95             $g2->add_vertices("a","b","c","d","e");
96             $g2->add_edges(['a', 'b'], ['b', 'c'], ['a', 'd'], ['d', 'e']);
97             my $method = $s->use('CoupledNodeEdgeScoring');
98             $method->calculate();
99             $method->showAllSimilarities;
100              
101             #===============================================
102             # Or by Similarity Flooding
103             my $g1 = Graph->new(multiedged => 1);
104             $g1->add_vertices("I","coffee","apple","swim");
105             $g1->add_edge_by_id("I", "coffee", "drink");
106             $g1->add_edge_by_id("I", "swim", "can't");
107             $g1->add_edge_by_id("I", "apple", "eat");
108              
109             my $g2 = Graph->new(multiedged => 1);
110             $g2->add_vertices("she","cake","apple juice","swim");
111             $g2->add_edge_by_id("she", "apple juice", "drink");
112             $g2->add_edge_by_id("she", "swim", "can");
113             $g2->add_edge_by_id("she", "cake", "eat");
114            
115             my $s = new Graph::Similarity(graph => [$g1,$g2]);
116             my $method = $s->use('SimimilarityFlooding');
117             $method->calculate();
118             $method->showAllSimilarities;
119            
120             =head1 DESCRIPTION
121              
122             Graph is composed of vertices and edges (This is often also referred as nodes/edge in network).
123             Graph::Similarity calculate the similarity of the vertices(nodes) by the following algorithms,
124              
125             =head3 SimRank
126              
127             =over 2
128              
129             =item Jeh et al "SimRank: A Measure of Structural-Context Similarity"
130              
131             =back
132              
133             =head3 Coupled Node Edge Scoring
134              
135             =over 2
136              
137             =item Vincent D. Blondel et al "Measure of Similarity between Graph Vertices: Applications to Synonym Extraction and Web Searching"
138              
139             =item Laura Zager "Graph Similarity and Matching"
140              
141             =back
142              
143              
144             =head3 Similarity Flooding
145              
146             =over 2
147              
148             =item Melnik et al. "Similarity Flooding: A Versatile Graph Matching Algorithm and its Application to Schema Matching"
149              
150             =back
151              
152             The algorithm is implemented by referring to the above papers. Each module in implementation layer(Graph::Similarity::<algorithm>) explains briefly about the algorithm.
153             However, if you would like to know the details, please read the original papers.
154              
155              
156             =head1 USAGE
157              
158             =head2 $s = new Graph::Similarity(graph => [$g1, $g2])
159              
160             Constructor. Create instance with L<Graph> argument. SimRank is one Graph, the others need two Graphs for the algorithm.
161              
162             =head2 $method = $s->use($algorithm)
163              
164             $algorithm is either 'SimRank', 'CoupledNodeEdgeScoring' or 'SimilarityFlooding'
165             Return an object of method.
166              
167             This use method verifies Graph feature to see whether it fits to the requirement.
168             If there is no required feature, it dies out.
169             For example, when you specify two Graph in SimRank, it dies because SimRank needs to be calculated from one graph.
170              
171             =head2 $method->calculate()
172              
173             Using the method that is specified by use(), calculate the similarity. This returns a hash reference which is the results of calculation.
174              
175             =head2 $method->setNumOfIteration($num)
176              
177             Set the number of Iteration. The argument should be Integer.
178              
179             =head2 $method->showAllSimilarities()
180              
181             The results to STDOUT.
182              
183             =head2 $method->getSimilairity("X", "Y")
184              
185             The vertex(node) has the name when it's created by Graph Module. Say, if you want to know the similarity between vertex "X" and "Y", use this method.
186              
187             =head1 EXAMPLES
188              
189             =head2 SimRank
190              
191             As an example of SimRank, we use Fig1 in the paper.
192              
193             use Graph;
194             use Graph::Similarity;
195            
196             my $g = Graph->new;
197             $g->add_vertices("Univ","ProfA","StudentA","ProfB","StudentB");
198             $g->add_edges(['Univ', 'ProfA'],
199             ['ProfA', 'StudentA'],
200             ['StudentA', 'Univ'],
201             ['Univ', 'ProfB'],
202             ['ProfB', 'StudentB'],
203             ['StudentB', 'ProfB']);
204            
205             my $s = new Graph::Similarity(graph => [$g]);
206             my $method = $s->use('SimRank');
207             $method->setNumOfIteration(5);
208             $method->setConst(0.8);
209             my $result = $method->calculate();
210             # print Dumper $result
211             $method->showAllSimilarities();
212              
213             The result is as follows. The number is very close to the Fig 1.
214              
215             StudentA - StudentA : 1
216             StudentA - ProfA : 0
217             StudentA - StudentB : 0.33048576
218             StudentA - Univ : 0
219             StudentA - ProfB : 0.04096
220             ProfA - StudentA : 0
221             ProfA - ProfA : 1
222             ProfA - StudentB : 0.1024
223             ProfA - Univ : 0
224             ProfA - ProfB : 0.4131072
225             StudentB - StudentA : 0.33048576
226             StudentB - ProfA : 0.1024
227             StudentB - StudentB : 1
228             StudentB - Univ : 0.032768
229             StudentB - ProfB : 0.08445952
230             Univ - StudentA : 0
231             Univ - ProfA : 0
232             Univ - StudentB : 0.032768
233             Univ - Univ : 1
234             Univ - ProfB : 0.128
235             ProfB - StudentA : 0.04096
236             ProfB - ProfA : 0.4131072
237             ProfB - StudentB : 0.084983808
238             ProfB - Univ : 0.132194304
239             ProfB - ProfB : 1
240              
241             =head2 Similarity Flooding
242              
243             As an example, use Fig 3 in the papaer.
244              
245             use Graph;
246             use Graph::Similarity;
247            
248             my $g1 = Graph->new(multiedged => 1);
249             $g1->add_vertices("a","a1","a2");
250             $g1->add_edge_by_id("a", "a1", "l1");
251             $g1->add_edge_by_id("a", "a2", "l1");
252             $g1->add_edge_by_id("a1", "a2", "l2");
253            
254             my $g2 = Graph->new(multiedged => 1);
255             $g2->add_vertices("b","b1","b2");
256             $g2->add_edge_by_id("b", "b1", "l1");
257             $g2->add_edge_by_id("b", "b2", "l2");
258             $g2->add_edge_by_id("b2", "b1", "l2");
259            
260             my $s = new Graph::Similarity(graph => [$g1,$g2]);
261             my $method = $s->use('SimilarityFlooding');
262             $method->setNumOfIteration(5);
263             my $result = $method->calculate();
264             # print Dumper $result
265             $method->showAllSimilarities();
266              
267             The result is the below. The edit distance is not used in the paper, whereas we use edit distance as initial value.
268             This causes the slight difference.
269              
270             a2 - b : 0.000115041702617199
271             a2 - b1 : 0.917094477998274
272             a2 - b2 : 0.191429393155019
273             a - b : 1
274             a - b1 : 0.000115041702617199
275             a - b2 : 0.000115041702617199
276             a1 - b : 0.191429393155019
277             a1 - b1 : 0.385493960310613
278             a1 - b2 : 0.699762726488352
279              
280             =head2 Coupled Node-Edge Scoring
281              
282             Fig 1.2 in the paper, "Measure of Similarity between Graph Vertices: Applications to Synonym Extraction and Web Searching",
283             as an example.
284              
285             use Graph;
286             use Graph::Similarity;
287            
288             my $g1 = Graph->new();
289             $g1->add_vertices("a1","a2","a3","a4");
290             $g1->add_edges(["a1","a3"],["a1","a2"],["a2","a1"],["a2","a3"],
291             ["a3","a2"],["a4","a1"],["a4","a3"]);
292            
293             my $g2 = Graph->new();
294             $g2->add_vertices("b1","b2","b3","b4","b5","b6");
295             $g2->add_edges(["b1","b3"],["b3","b1"],["b6","b1"],["b6","b3"],
296             ["b3","b6"],["b3","b5"],["b2","b6"],["b2","b4"],
297             ["b1","b4"],["b6","b4"]);
298            
299             my $s = new Graph::Similarity(graph => [$g1,$g2]);
300             my $method = $s->use('CoupledNodeEdgeScoring');
301             $method->setNumOfIteration(50);
302             my $result = $method->calculate();
303             # print Dumper $result
304             $method->showAllSimilarities();
305              
306             The result is,
307              
308             b3 - a2 : 0.311518652195988
309             b3 - a4 : 0.166703492014422
310             b3 - a1 : 0.290390588307599
311             b3 - a3 : 0.282452510821415
312             b6 - a2 : 0.301149501715672
313             b6 - a4 : 0.199935544942559
314             b6 - a1 : 0.30383446637482
315             b6 - a3 : 0.253224302437108
316             b1 - a2 : 0.278635459119205
317             b1 - a4 : 0.128928289895856
318             b1 - a1 : 0.263618445136368
319             b1 - a3 : 0.272302658479426
320             b5 - a2 : 0.0758992640884854
321             b5 - a4 : 0
322             b5 - a1 : 0.0633623885302286
323             b5 - a3 : 0.101837901214133
324             b2 - a2 : 0.126836930942235
325             b2 - a4 : 0.126836930942235
326             b2 - a1 : 0.128617950209435
327             b2 - a3 : 0.0624424971383059
328             b4 - a2 : 0.170129852866194
329             b4 - a4 : 0
330             b4 - a1 : 0.15400277534204
331             b4 - a3 : 0.246229188289944
332              
333              
334              
335             =head1 DIAGNOSTICS
336              
337             You may see the following error messages:
338              
339             =over
340              
341             =item C<This algorithm can only apply to single graph>
342              
343             The algorithm needs to have single graph as argument.
344              
345             =item C<The graph needs to be directed graph>
346              
347             Undirected graph can't be applied to this algorithm.
348              
349             =item C<The graph needs to be multiedged>
350              
351             The algorithm needs to has multiedged graph with Graph->new(multiedged => 1)
352              
353             =back
354              
355             =head1 CONFIGURATION AND ENVIRONMENT
356              
357             Graph::Similarity requires no configuration files or environment variables.
358              
359              
360             =head1 DEPENDENCIES
361              
362             None.
363              
364              
365             =head1 AUTHOR
366              
367             Shohei Kameda C<< <shoheik@cpan.org> >>
368              
369              
370             =head1 LICENCE AND COPYRIGHT
371              
372             Copyright (c) 2012, Shohei Kameda C<< <shoheik@cpan.org> >>. All rights reserved.
373              
374             This module is free software; you can redistribute it and/or
375             modify it under the same terms as Perl itself. See L<perlartistic>.
376              
377              
378             =head1 DISCLAIMER OF WARRANTY
379              
380             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
381             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
382             OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
383             PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
384             EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
385             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
386             ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
387             YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
388             NECESSARY SERVICING, REPAIR, OR CORRECTION.
389              
390             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
391             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
392             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
393             LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
394             OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
395             THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
396             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
397             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
398             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
399             SUCH DAMAGES.