| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# RDF::Query::BGPOptimizer |
|
2
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
RDF::Query::BGPOptimizer - Optimizer for ordering the joins of triple patterns in a BGP |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes RDF::Query::BGPOptimizer version 2.915_01. |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 STATUS |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
This module's API and functionality should be considered unstable. |
|
15
|
|
|
|
|
|
|
In the future, this module may change in backwards-incompatible ways, |
|
16
|
|
|
|
|
|
|
or be removed entirely. If you need functionality that this module provides, |
|
17
|
|
|
|
|
|
|
please L<get in touch|http://www.perlrdf.org/>. |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 METHODS |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=over 4 |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=cut |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
package RDF::Query::BGPOptimizer; |
|
26
|
|
|
|
|
|
|
|
|
27
|
35
|
|
|
35
|
|
183
|
use strict; |
|
|
35
|
|
|
|
|
76
|
|
|
|
35
|
|
|
|
|
910
|
|
|
28
|
35
|
|
|
35
|
|
180
|
use warnings; |
|
|
35
|
|
|
|
|
80
|
|
|
|
35
|
|
|
|
|
820
|
|
|
29
|
35
|
|
|
35
|
|
197
|
use Data::Dumper; |
|
|
35
|
|
|
|
|
72
|
|
|
|
35
|
|
|
|
|
1642
|
|
|
30
|
35
|
|
|
35
|
|
186
|
use List::Util qw(reduce); |
|
|
35
|
|
|
|
|
84
|
|
|
|
35
|
|
|
|
|
1934
|
|
|
31
|
35
|
|
|
35
|
|
184
|
use Scalar::Util qw(blessed reftype refaddr); |
|
|
35
|
|
|
|
|
96
|
|
|
|
35
|
|
|
|
|
2092
|
|
|
32
|
35
|
|
|
35
|
|
189
|
use RDF::Query::Error qw(:try); |
|
|
35
|
|
|
|
|
89
|
|
|
|
35
|
|
|
|
|
255
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
###################################################################### |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our ($VERSION); |
|
37
|
|
|
|
|
|
|
BEGIN { |
|
38
|
35
|
|
|
35
|
|
36187
|
$VERSION = '2.915_01'; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
###################################################################### |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item C<< ordered_triples ( $context, @triples ) >> |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Returns a list of triples, ordered so as to optimize a left-deep join plan based |
|
46
|
|
|
|
|
|
|
on the frequency counts provided by the underlying model. |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub ordered_triples { |
|
51
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
52
|
0
|
|
|
|
|
|
my $context = shift; |
|
53
|
0
|
|
|
|
|
|
my @triples = @_; |
|
54
|
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
my $model = $context->model; |
|
56
|
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my %vars; |
|
58
|
|
|
|
|
|
|
my %seen; |
|
59
|
|
|
|
|
|
|
my @weighted = map { |
|
60
|
0
|
|
|
|
|
|
my $triple = RDF::Query::Plan::Triple->new( $_->nodes ); |
|
|
0
|
|
|
|
|
|
|
|
61
|
0
|
|
|
|
|
|
[ $_, $self->_cost( $triple, $context ) ] |
|
62
|
|
|
|
|
|
|
} @triples; |
|
63
|
0
|
|
|
|
|
|
my %triples = map { refaddr($_->[0]) => $_ } @weighted; |
|
|
0
|
|
|
|
|
|
|
|
64
|
0
|
|
|
|
|
|
my @ordered = sort { $a->[1] <=> $b->[1] } @weighted; |
|
|
0
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
|
foreach my $t (@triples) { |
|
67
|
0
|
|
|
|
|
|
my @vars = $self->_triple_vars( $t ); |
|
68
|
0
|
|
|
|
|
|
foreach my $name (@vars) { |
|
69
|
0
|
0
|
|
|
|
|
push( @{ $vars{ $name } }, $t ) unless ($seen{ $name }{ refaddr($t) }++); |
|
|
0
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
my %edges; |
|
74
|
0
|
|
|
|
|
|
foreach my $name (keys %vars) { |
|
75
|
0
|
|
|
|
|
|
my @triples = @{ $vars{ $name } }; |
|
|
0
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
foreach my $t (@triples) { |
|
77
|
0
|
|
|
|
|
|
my $ta = refaddr($t); |
|
78
|
0
|
|
|
|
|
|
foreach my $u (@triples) { |
|
79
|
0
|
|
|
|
|
|
my $ua = refaddr($u); |
|
80
|
0
|
0
|
|
|
|
|
next if ($ta == $ua); |
|
81
|
0
|
|
|
|
|
|
$edges{ $ta }{ $ua } = $u; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my @final; |
|
88
|
|
|
|
|
|
|
my %used; |
|
89
|
0
|
|
|
|
|
|
my $start = shift(@ordered); |
|
90
|
0
|
|
|
|
|
|
$used{ refaddr($start) }++; |
|
91
|
0
|
|
|
|
|
|
push(@final, $start); |
|
92
|
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
my @seen = refaddr($start->[0]); |
|
94
|
0
|
|
|
|
|
|
my $count = 0; |
|
95
|
0
|
|
|
|
|
|
while (@ordered) { |
|
96
|
0
|
0
|
|
|
|
|
if (++$count > scalar(@triples)) { |
|
97
|
0
|
|
|
|
|
|
die "loop in BGPOptimizer (?)"; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
my @frontier = grep { not($used{refaddr($_)}) } map { $triples{ $_ } } map { keys(%{ $edges{ $_ } }) } @seen; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
my @orderedf = sort { $a->[1] <=> $b->[1] } @frontier; |
|
|
0
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
|
if (@orderedf) { |
|
103
|
0
|
|
|
|
|
|
my $next = shift(@orderedf); |
|
104
|
0
|
|
|
|
|
|
my $addr = refaddr($next); |
|
105
|
0
|
|
|
|
|
|
$used{ $addr }++; |
|
106
|
0
|
|
|
|
|
|
push(@final, $next); |
|
107
|
0
|
|
|
|
|
|
push(@seen, refaddr($next->[0])); |
|
108
|
0
|
|
|
|
|
|
@ordered = grep { refaddr($_) != $addr } @ordered; |
|
|
0
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
} else { |
|
110
|
0
|
|
|
|
|
|
my $next = shift(@ordered); |
|
111
|
0
|
|
|
|
|
|
my $addr = refaddr($next); |
|
112
|
0
|
|
|
|
|
|
$used{ $addr }++; |
|
113
|
0
|
|
|
|
|
|
push(@final, $next); |
|
114
|
0
|
|
|
|
|
|
push(@seen, refaddr($next->[0])); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
return map { $_->[0] } @final; |
|
|
0
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub _cost { |
|
122
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
123
|
0
|
|
|
|
|
|
my $pattern = shift; |
|
124
|
0
|
|
|
|
|
|
my $context = shift; |
|
125
|
0
|
|
|
|
|
|
my $l = Log::Log4perl->get_logger("rdf.query.bgpoptimizer"); |
|
126
|
0
|
|
|
|
|
|
my $bf = $pattern->bf( $context ); |
|
127
|
0
|
|
|
|
|
|
my $f = ($bf =~ tr/f//); |
|
128
|
0
|
|
|
|
|
|
my $r = $f / 3; |
|
129
|
0
|
|
|
|
|
|
$l->debug( "Pattern has bf representation '$bf'" ); |
|
130
|
0
|
|
|
|
|
|
$l->debug( "There are $f of 3 free variables" ); |
|
131
|
0
|
|
|
|
|
|
$l->debug( 'Estimated cardinality of triple is : ' . $r ); |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# round the cardinality to an integer |
|
134
|
0
|
|
|
|
|
|
return int($r + .5 * ($r <=> 0)); |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub _triple_vars { |
|
138
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
139
|
0
|
|
|
|
|
|
my $t = shift; |
|
140
|
0
|
|
|
|
|
|
my @nodes = $t->nodes; |
|
141
|
0
|
|
|
|
|
|
my (@vars, %seen); |
|
142
|
0
|
|
|
|
|
|
foreach my $n (@nodes) { |
|
143
|
0
|
0
|
|
|
|
|
if ($n->isa('RDF::Trine::Node::Variable')) { |
|
144
|
0
|
|
|
|
|
|
my $name = $n->name; |
|
145
|
0
|
0
|
|
|
|
|
push(@vars, $name) unless ($seen{ $name }++); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
} |
|
148
|
0
|
|
|
|
|
|
return @vars; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
1; |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
__END__ |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=back |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Gregory Todd Williams <gwilliams@cpan.org> |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |