File Coverage

blib/lib/Graph/Similarity/SimilarityFlooding.pm

Criterion	Covered	Total	%
statement	10	12	83.3
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	14	16	87.5

line	stmt	sub	time	code
1				package Graph::Similarity::SimilarityFlooding;
2
3	1	1	1570	use strict;
	1		2
	1		40
4	1	1	6	use warnings;
	1		2
	1		27
5
6	1	1	1332	use Graph;
	1		154224
	1		39
7	1	1	494	use Moose;
	0
	0
8				use Text::Levenshtein qw(distance);
9
10				our $VERSION = '0.02';
11
12				with 'Graph::Similarity::Method';
13
14				has 'graph' => (is => 'rw', isa => 'ArrayRef[Graph]', required => 1);
15
16				__PACKAGE__->meta->make_immutable;
17				no Moose;
18
19				sub calculate {
20				my $self = shift;
21
22				# Store similarity matrix $sim{vertex1}{vertex2}
23				my %sim;
24				my $itr = $self->num_of_iteration;
25				my $g = $self->graph;
26				my $g1 = $$g[0];
27				my $g2 = $$g[1];
28
29				# Create InitialMap
30				# Similarity is calculated by 1 - (edit distnace(stringA, stringB) / length of the stringA + stringB)
31				# This calcualtion can be changed
32				for my $v1 ($g1->vertices){
33				for my $v2 ($g2->vertices){
34				$sim{$v1}{$v2} = 1 - (distance($v1, $v2) / length("$v1$v2"));
35				#print "$v1-$v2\n";
36				#$sim{$v1}{$v2} = 1;
37				}
38				}
39
40				# Create Pairwise Connectivity Graph
41				my $pcg = Graph->new(multiedged => 1);
42
43				# Frist, collect source, destination node and label
44				# The is for Graph1
45				my %m1;
46				my %labels;
47				for my $v1 ($g1->vertices){
48				for my $p1 ($g1->predecessors($v1)){
49				for my $label ($g1->get_multiedge_ids($p1, $v1)){
50				# {"LABEL"}{"SOURCE NODE"}{"DESTINATION NODE"}
51				$m1{$label}{$p1}{$v1} = 1; # There is no meaing to put 1. Just want to pickup unique key later
52				$labels{$label} = 1;
53				}
54				}
55				}
56				# For Graph2
57				my %m2;
58				my @labels;
59				for my $v2 ($g2->vertices){
60				for my $p2 ($g2->predecessors($v2)){
61				for my $label ($g2->get_multiedge_ids($p2, $v2)){
62				# {"LABEL"}{"SOURCE NODE"}{"DESTINATION NODE"}
63				$m2{$label}{$p2}{$v2} = 1;
64				$labels{$label} = 1;
65				}
66				}
67				}
68
69				# Secondary, add pairwise node.
70				# Node name is src1(from graph1)/src2(from graph2) or dest1(from graph1)/dest2(from graph2)
71				# %edges used for couting the label of neighbors
72				my %edges;
73				for my $label (keys %labels) {
74				#print $label, "------\n";
75				for my $src1 (keys %{$m1{$label}}){
76				for my $src2 (keys %{$m2{$label}}){
77				for my $dest1 (keys %{$m1{$label}{$src1}}){
78				for my $dest2 (keys %{$m2{$label}{$src2}}){
79				#print "src - $src1,$src2\n";
80				#print "dest - $dest1,$dest2\n";
81				$pcg->add_edge_by_id("$src1/$src2", "$dest1/$dest2", $label );
82				$pcg->add_edge_by_id("$dest1/$dest2", "$src1/$src2", $label );
83				$edges{"$src1/$src2"}{$label}++;
84				$edges{"$dest1/$dest2"}{$label}++;
85				}
86				}
87				}
88				}
89				}
90
91
92
93				# Start iteration
94				for (my $i=0; $i<$itr; $i++){
95
96				# Based on label info, create the logic to behave as the same as "Induced Propagation Graph" in the paper
97				my $max=0;
98				my %next_sim;
99				for my $v1 ($g1->vertices){
100				for my $v2 ($g2->vertices){
101
102				my $sum=0;
103				for my $n ($pcg->neighbours("$v1/$v2")){
104				for my $label ($pcg->get_multiedge_ids($n, "$v1/$v2")){
105				#print 1/$edges{$n}{$label};
106				#print " * $n : neighbor of $v1/$v2\n";
107				my ($n1, $n2) = split /\//, $n;
108				$sum += $sim{$n1}{$n2} / $edges{$n}{$label};
109
110				}
111				}
112
113				$next_sim{$v1}{$v2} = $sim{$v1}{$v2} + $sum;
114				if ($max < $next_sim{$v1}{$v2}){
115				$max = $next_sim{$v1}{$v2};
116				}
117				}
118				}
119
120
121				# Normalizing
122				# Deviding the maximum value
123				for my $v1 ($g1->vertices){
124				for my $v2 ($g2->vertices){
125
126				if (defined $next_sim{$v1}{$v2}){
127				$sim{$v1}{$v2} = $next_sim{$v1}{$v2} / $max;
128				}
129				else {
130				$sim{$v1}{$v2} = $sim{$v1}{$v2} / $max;
131				}
132				}
133				}
134
135				}
136
137				$self->_setSimilarity(\%sim);
138				return \%sim;
139				#return 1;
140				}
141
142
143
144				=head1 NAME
145
146				Graph::Similarity::SimilarityFlooding - Similarity Flooding implementation
147
148				=head1 VERSION
149
150				Version 0.02
151
152				=head1 SYNOPSIS
153
154				Please see L<Graph::Similarity>
155
156				=head1 DESCRIPTION
157
158				This is the implementation of the below paper.
159
160				B<Sergey Melnik, Hector Garcia-Molina, Erhard Rahm "Similarity Flooding: A Versatile Graph Matching Algorithm
161				and its Application to Schema Matching">
162
163				=head1 METHODS
164
165				=head2 calculate()
166
167				This calculates Similarity Flooding. The algorithm is not clearly mentioned in the papeer.
168				I made it to code from reading "Figure 3. Example illustrating the Similarity Flooding Algorithm".
169
170				=head1 AUTHOR
171
172				Shohei Kameda, C<< <shoheik at cpan.org> >>
173
174				=head1 LICENSE AND COPYRIGHT
175
176				Copyright 2012 Shohei Kameda.
177
178				This program is free software; you can redistribute it and/or modify it
179				under the terms of either: the GNU General Public License as published
180				by the Free Software Foundation; or the Artistic License.
181
182				See http://dev.perl.org/licenses/ for more information.
183
184
185				=cut
186
187				1; # End of Graph::Similarity::SimilarityFlooding