File Coverage

blib/lib/Graph/ChuLiuEdmonds.pm
Criterion Covered Total %
statement 9 125 7.2
branch 0 62 0.0
condition 0 18 0.0
subroutine 3 6 50.0
pod 0 2 0.0
total 12 213 5.6


line stmt bran cond sub pod time code
1             package Graph::ChuLiuEdmonds;
2              
3 1     1   25396 use warnings;
  1         4  
  1         34  
4 1     1   7 use strict;
  1         2  
  1         39  
5              
6             =head1 NAME
7              
8             Graph::ChuLiuEdmonds - Find minimum spanning trees in a directed graph.
9              
10             =head1 VERSION
11              
12             Version 0.05
13              
14             =cut
15              
16 1     1   6 use Carp;
  1         6  
  1         1902  
17             our $VERSION = '0.06';
18             our $DEBUG=0;
19              
20             =head1 SYNOPSIS
21              
22             This module implements Chu-Liu-Edmonds L<[1]>,L<[2]> algorithm for finding a minimum
23             spanning tree (MST) in a directed graph.
24              
25             use Graph;
26             use Graph::Directed;
27             use Graph::ChuLiuEdmonds;
28              
29             my $graph = Graph::Directed->new(vertices=>[qw(a b c d)]);
30             $graph->add_weighted_edges(qw(a b 3 c d 7 d a 2 d b 1 c a 2));
31             my $msts = $graph->MST_ChuLiuEdmonds($graph);
32             ...
33              
34             =head1 EXPORT
35              
36             None.
37              
38             =head1 FUNCTIONS
39              
40             =head2 MST_ChuLiuEdmonds
41              
42             my $msts = $graph->MST_ChuLiuEdmonds();
43              
44             Returns a Graph object that is a forest consisting of MSTs for a given
45             directed graph.
46              
47             Minimum Spanning Trees or MSTs are directed tree subgraphs derived
48             from a directed graph that "span the graph" (covering all the
49             vertices) using as lightly weighted (hence the "minimum") edges as
50             possible.
51              
52             =cut
53              
54             sub Graph::MST_ChuLiuEdmonds_no_copy {
55 0     0 0   my ($graph)=@_;
56 0 0         carp("graph not directed") unless $graph->is_directed;
57 0           return _MST($graph);
58             }
59              
60             =head2 MST_ChuLiuEdmonds_no_copy
61              
62             my $msts = $graph->MST_ChuLiuEdmonds();
63              
64             Like the method above, only avoiding deep-copying the graph; the
65             method prunes $graph so as only the MSTs remain of it.
66              
67             =cut
68              
69             sub Graph::MST_ChuLiuEdmonds {
70 0     0 0   my ($graph)=@_;
71 0 0         carp("graph not directed") unless $graph->is_directed;
72 0           return _MST($graph->deep_copy);
73             }
74              
75             sub _MST {
76 0     0     my ($g)=@_;
77              
78 0           my %in; # in the resulting (or partial) MST, this will map a vertex Y to the vertex X
79             # in which the unique edge incoming to Y starts
80             # i.e maps Y => X if X->Y is an edge of the resulting MST
81              
82             # phase 1: add best edges and contract cycles
83 0           my $cycle_no=0;
84 0           my @V = $g->vertices;
85 0 0         print "Vertices: @V\n" if $DEBUG;
86 0           my $_no_vertices=@V;
87 0           my @C;
88 0           my ($x,$y,$w,$e);
89 0           while (@V) {
90 0 0         print "Graph: $g\n" if $DEBUG;
91 0           $y = shift @V;
92 0           my $best_w;
93 0 0         print STDERR "selecting incoming edges for vertex $y\n" if $DEBUG;
94 0           for my $e ($g->edges_to($y)) {
95 0           $w = $g->get_edge_weight( $e->[0], $y );
96 0 0 0       if (!defined($best_w) or $w<$best_w) {
97 0           $best_w=$w;
98 0           $x=$e->[0];
99             }
100             }
101 0 0         next unless defined $best_w;
102 0 0         print STDERR "best $x-$y: $best_w\n" if $DEBUG;
103             # we add the best incoming edge edge to $y
104 0           $in{$y}=$x;
105             # now we check it does not add a cycle to the MST:
106 0           my @cycle_nodes=($y);
107 0           my $i=0;
108 0   0       do {
109 0           unshift @cycle_nodes, $x;
110 0           $x=$in{$x};
111 0 0         die "BUG: looking for a cycle caused an infinite loop" if $i++ > $_no_vertices; # just for sure: should never happen.
112             } while (defined($x) and $x ne $y);
113 0 0         if (defined $x) {
114             # the new edge made a cycle:
115             # contract
116 0           my $cycle = 'CYCLE:'.($cycle_no++);
117 0 0         print STDERR "$cycle: @cycle_nodes\n" if $DEBUG;
118 0 0         my @cycle_weights = map {
119 0           print STDERR " $_: $cycle_nodes[$_-1],$cycle_nodes[$_]\n" if $DEBUG;
120 0           $g->get_edge_weight($cycle_nodes[$_-1],$cycle_nodes[$_]) } 0..$#cycle_nodes;
121 0 0         print STDERR "cycle weights: @cycle_weights\n" if $DEBUG;
122 0           push @V,$cycle;
123              
124 0           $g->add_vertex($cycle); # will represent the contracted @cycle_nodes
125              
126 0           my %in_cycle; @in_cycle{@cycle_nodes}=();
  0            
127              
128             # for each vertex in which ends an edge starting on the cycle,
129             # find the lightest edge to be preserved
130 0           my %from=(); my %fromW=();
  0            
131 0           for $x (@cycle_nodes) {
132 0           for my $e ($g->edges_from($x)) {
133 0           $y=$e->[1];
134 0 0         next if exists $in_cycle{$y};
135 0 0 0       if (exists $in{$y} and exists $in_cycle{$in{$y}}) {
136 0           $in{$y}=$cycle;
137             }
138 0           $w=$g->get_edge_weight($x,$y);
139 0 0 0       if (!exists($fromW{$y}) or $w < $fromW{$y}) {
140 0           $from{$y}=$x;
141 0           $fromW{$y}=$w;
142             }
143             }
144             }
145 0           for $y (keys %from) {
146 0 0         print STDERR "adding edge $cycle -> $y weight $fromW{$y}\n" if $DEBUG;
147 0           $g->add_weighted_edge($cycle, $y, $fromW{$y});
148             }
149              
150             # Similarly for edges that end on the cycle.
151             # For each such edge X->Y with Y on the cycle
152             # we compute a weight as w(X->Y)+the weight of the arc
153             # of the cycle starting at Y and ending on a node preceding Y
154             # in the cycle. For a fixed X we find Y on the cycle
155             # for which this computed weight is minimal.
156 0           my %to;
157 0           my %toW=(); my $i=0;
  0            
158 0           my $C=0; $C+=$_ for @cycle_weights; # weight of the whole cycle
  0            
159 0           for $y (@cycle_nodes) {
160 0           for $e ($g->edges_to($y)) {
161 0           $x=$e->[0];
162 0 0         next if exists $in_cycle{$x};
163 0           $w=$g->get_edge_weight($x,$y)+$C-$cycle_weights[$i];
164 0 0 0       if (!exists($toW{$x}) or $w < $toW{$x}) {
165 0           $to{$x}=$y;
166 0           $toW{$x}=$w;
167             }
168             }
169 0           $i++;
170             }
171 0           for my $x (keys %to) {
172 0 0         print STDERR "adding edge $x -> $cycle weight $toW{$x}\n" if $DEBUG;
173 0           $g->add_weighted_edge($x, $cycle, $toW{$x});
174             }
175              
176             # delete the nodes of the @cycle_nodes
177 0           $g->delete_vertices(@cycle_nodes);
178 0           delete @in{@cycle_nodes};
179 0           push @C,[$cycle,\@cycle_nodes,\@cycle_weights,\%to,\%from,\%toW,\%fromW];
180             }
181             }
182             # ok, now we have processed all nodes, including the nodes
183             # representing the contracted cycles.
184             # there is at most one incoming edge to
185             # each node (and exactly one if there was
186             # at least one in the original graph).
187              
188             # prune all edges that are not in the resulting (contracted) MST
189 0 0         print STDERR "before phase2: $g\n" if $DEBUG;
190 0           for $y ($g->vertices) {
191 0           $x=$in{$y};
192 0 0         $g->delete_edges(map { @$_[0,1] } grep { !defined($x) or ($_->[0] ne $x) } $g->edges_to($y));
  0            
  0            
193             }
194             # phase 2: expand all cycles
195 0 0         print STDERR "phase2: $g\n" if $DEBUG;
196 0           while (@C) {
197 0           my $C = pop @C;
198 0           my ($cycle,$cycle_nodes,$cycle_weights,$to,$from,$toW,$fromW)=@$C;
199              
200 0 0         print STDERR "expanding: $cycle\n" if $DEBUG;
201 0           $g->add_vertices(@$cycle_nodes);
202              
203             # fix incoming edge
204 0           ($e) = $g->edges_to($cycle); # should now be at most one
205 0 0         if ($e) {
206 0           $x=$e->[0];
207             #print STDERR "incoming edge from: $x\n" if $DEBUG;
208 0           $y = $to->{$x};
209 0           $g->add_weighted_edge($x,$y,$toW->{$x});
210 0           for my $i (0..$#$cycle_nodes) {
211 0 0         $g->add_weighted_edge($cycle_nodes->[$i-1],$cycle_nodes->[$i],$cycle_weights->[$i]) unless $cycle_nodes->[$i] eq $y;
212             }
213             } else {
214             # the whole graph starts at this cycle
215             # find the edge with the lowest weight and disconnect there
216             #print STDERR "the cycle is a root\n" if $DEBUG;
217 0           my $max;
218             my $max_i; # the worst edge on the cycle
219 0           my $i = 0;
220 0           for my $w (@$cycle_weights) {
221 0 0 0       if (!defined($max) or $w>$max) {
222 0           $max = $w;
223 0           $max_i=$i;
224             }
225 0           $i++
226             }
227 0           for $i (0..$#$cycle_nodes) {
228             #print "adding edge $cycle_nodes->[$i-1],$cycle_nodes->[$i] $cycle_weights->[$i] unless $i==$max_i\n" if $DEBUG;
229 0 0         $g->add_weighted_edge($cycle_nodes->[$i-1],$cycle_nodes->[$i],$cycle_weights->[$i]) unless $i==$max_i;
230             }
231             }
232             # fix outgoing edge
233 0           for $e ($g->edges_from($cycle)) {
234 0           $y = $e->[1];
235 0           $x = $from->{$y};
236 0 0         print STDERR "restoring edge $x -> $e->[1]\n" if $DEBUG;
237 0           $g->add_weighted_edge($x,$y,$fromW->{$y});
238             }
239 0           $g->delete_vertex($cycle);
240 0 0         print STDERR "expanded: $g\n" if $DEBUG;
241             }
242             # all cycles expanded, we are done!
243 0 0         print STDERR "MST: $g\n" if $DEBUG;
244 0           return $g;
245             }
246              
247              
248             =head1 AUTHOR
249              
250             Petr Pajas, C<< >>
251              
252             =head1 CAVEATS
253              
254             =over 5
255              
256             =item o
257              
258             The implementation was not tested on complex examples.
259              
260             =item o
261              
262             Vertices cannot be perl objects (or references).
263              
264             =item o
265              
266             Vertex and edge attributes are not copied from the source graph to the
267             resulting graph (except for edge weights).
268              
269             =item o
270              
271             The author did not attempt to compute the actual algorithmic
272             complexity of this particular implementation.
273              
274             =item o
275              
276             The algorithm implemented in this module returns the optimal MSTs. To
277             obtain k-best MSTs, one could implement Camerini's algorithm L<[4]>
278             (also described in [5]).
279              
280             =back
281              
282             =head1 BUGS
283              
284             Please report any bugs or feature requests to
285             C, or through the web interface at
286             L.
287             I will be notified, and then you'll automatically be notified of progress on
288             your bug as I make changes.
289              
290             =head1 SUPPORT
291              
292             You can find documentation for this module with the perldoc command.
293              
294             perldoc Graph::ChuLiuEdmonds
295              
296             You can also look for information at:
297              
298             =over 4
299              
300             =item * AnnoCPAN: Annotated CPAN documentation
301              
302             L
303              
304             =item * CPAN Ratings
305              
306             L
307              
308             =item * RT: CPAN's request tracker
309              
310             L
311              
312             =item * Search CPAN
313              
314             L
315              
316             =back
317              
318             =head1 SEE ALSO
319              
320             The implementation follows the algorithm published by Edmonds L<[1]>
321             and independently Chu and Liu L<[2]>, as scatched in the 3rd section
322             of L<[3]>. Note that possibly more efficient implementation is
323             suggested in L<[3]>.
324              
325             =over 4
326              
327             =item [1]
328              
329             J. Edmonds. 1967. Optimum branchings. Journal of Research of the
330             National Bureau of Standards, 71B:233-240.
331              
332             =item [2]
333              
334             Y.J. Chu and T.H. Liu. 1965. On the shortest arborescence of a
335             directed graph. Science Sinica, 14:1396-1400.
336              
337             =item [3]
338              
339             H. N. Gabow, Z. Galil, T. Spencer and R. E. Tarjan. 1986
340             Efficient algorithms for finding minimum spanning trees in undirected
341             and directed graphs. Combinatorica 6 (2) 109-122
342              
343             =item [4]
344              
345             Paolo M. Camerini, Luigi Fratta, and Francesco Maffioli. 1980.
346             The k best spanning arborescences of a network. Networks,
347             10:91-110.
348              
349             =item [5]
350              
351             Keith Hall. 2007. k-best spanning tree parsing. In (To Appear)
352             Proceedings of the 45th Annual Meeting of the Association for
353             Computational Linguistics.
354              
355             =back
356              
357             =head1 ACKNOWLEDGEMENTS
358              
359             The development of this module was supported by grant GA AV CR 1ET101120503.
360              
361             =head1 COPYRIGHT & LICENSE
362              
363             Copyright 2008 Petr Pajas, all rights reserved.
364              
365             This program is free software; you can redistribute it and/or modify it
366             under the same terms as Perl itself.
367              
368             =cut
369              
370             1; # End of Graph::ChuLiuEdmonds