File Coverage

blib/lib/Graph/Undirected/Components.pm
Criterion Covered Total %
statement 130 156 83.3
branch 23 36 63.8
condition 1 3 33.3
subroutine 19 22 86.3
pod 6 12 50.0
total 179 229 78.1


line stmt bran cond sub pod time code
1             package Graph::Undirected::Components;
2 1     1   35023 use strict;
  1         3  
  1         43  
3 1     1   5 use warnings;
  1         2  
  1         40  
4              
5             #use Data::Dump qw(dump);
6              
7             BEGIN
8             {
9 1     1   5 use Exporter ();
  1         20  
  1         23  
10 1     1   5 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         1  
  1         1281  
11 1     1   3 $VERSION = '0.31';
12 1         18 @ISA = qw(Exporter);
13 1         3 @EXPORT = qw();
14 1         2 @EXPORT_OK = qw();
15 1         127 %EXPORT_TAGS = ();
16             }
17              
18             # A list is used to hold info about each vertex. The constants
19             # are the indices into the list.
20              
21             # VI_PARENT holds the parent vertex of the vertex.
22 1     1   7 use constant VI_PARENT => 0;
  1         1  
  1         94  
23              
24             # VI_TIME holds the last access time of the vertex. The access time of a
25             # root vertex is always greater than all the vertices pointing to it,
26             # except the root vertex itself.
27 1     1   5 use constant VI_TIME => 1;
  1         2  
  1         67  
28              
29             # For each root vertex VI_SIZE holds the number of vertices pointing to
30             # it, include itself.
31 1     1   4 use constant VI_SIZE => 2;
  1         2  
  1         57  
32              
33             # For each root VI_MIN holds the lexographical min of all the vertices pointing
34             # to it, including itself.
35 1     1   5 use constant VI_MIN => 3;
  1         1  
  1         1223  
36              
37             #01234567890123456789012345678901234567891234
38             #Computes components of an undirected graph.
39              
40             =head1 NAME
41              
42             C - Computes components of an undirected graph.
43              
44             =head1 SYNOPSIS
45              
46             use Data::Dump qw(dump);
47             use Graph::Undirected::Components;
48             my $componenter = Graph::Undirected::Components->new();
49             my $vertices = 10;
50             for (my $i = 0; $i < $vertices; $i++)
51             {
52             $componenter->add_edge (int rand $vertices, int rand $vertices);
53             }
54             dump $componenter->connected_components ();
55              
56             =head1 DESCRIPTION
57              
58             C computes the components of an undirected
59             graph using a disjoint set data structure, so the memory used is bounded
60             by the number of vertices only.
61              
62             =head1 CONSTRUCTOR
63              
64             =head2 C
65              
66             The method C creates an instance of the C
67             class; it takes no parameters.
68              
69             =cut
70              
71             sub new
72             {
73              
74             # get the object type and create it.
75 47     47 1 132550 my ($Class, %Parameters) = @_;
76 47   33     486 my $Self = bless({}, ref($Class) || $Class);
77              
78             # return the object.
79 47         184 return $Self->clear();
80             }
81              
82             sub clear
83             {
84              
85             # get the object type and create it.
86 456     456 0 603 my $Self = $_[0];
87              
88             # set the hash to hold the vertex info (root, size, time).
89 456         833 $Self->{vertices} = {};
90              
91             # keep track of the total vertices and the approximate number of bytes they use.
92 456         622 $Self->{totalVertices} = 0;
93 456         732 $Self->{totalSize} = 0;
94              
95             # used to log the last time a vertex was accessed.
96 456         569 $Self->{counter} = 0;
97              
98             # return the object.
99 456         702 return $Self;
100             }
101              
102             =head1 METHODS
103              
104             =head2 C
105              
106             The method C updates the components of the graph using the edge
107             C<(vertexA, vertexB)>.
108              
109             =over
110              
111             =item vertexA, vertexB
112              
113             The vertices of the edge C<(vertexA, vertexB)> are Perl strings. If only C
114             is defined, then the edge C<(vertexA, vertexA)> is added to the graph. The method always returns
115             undef.
116              
117             =back
118              
119             =cut
120              
121             sub add_edge
122             {
123              
124             # if no edge, return undef now.
125 17264 50   17264 1 88963 return undef if @_ < 2;
126              
127             # get the object.
128 17264         19064 my $Self = $_[0];
129              
130             # force a loop edge if one node.
131 17264 50       42738 $_[2] = $_[1] if @_ < 3;
132              
133             # update the access time of the first node.
134             {
135 17264         15920 my $vertexInfoX = $Self->get_vertex_info($_[1], 1);
  17264         34738  
136 17264         32125 $vertexInfoX->[VI_TIME] = $Self->{counter}++;
137             }
138              
139             # update the access time of the second node if different.
140 17264 100       46803 if ($_[1] ne $_[2])
141             {
142 17258         32739 my $vertexInfoY = $Self->get_vertex_info($_[2], 1);
143 17258         32837 $vertexInfoY->[VI_TIME] = $Self->{counter}++;
144             }
145              
146             # get the info about the roots of the two vertices.
147 17264         31255 my $rootOfVertexInfoX = $Self->get_root_vertex_info($_[1]);
148 17264         32141 my $rootOfVertexInfoY = $Self->get_root_vertex_info($_[2]);
149              
150             # if the vertices have the same root, return now.
151 17264 100       37895 if ($rootOfVertexInfoX == $rootOfVertexInfoY)
152             {
153              
154             # update the access time of the root.
155 28         61 $rootOfVertexInfoX->[VI_TIME] = $Self->{counter}++;
156              
157 28         66 return undef;
158             }
159              
160 17236         20713 my ($newRoot, $otherRoot);
161 17236 100       42076 if ($rootOfVertexInfoX->[VI_SIZE] > $rootOfVertexInfoY->[VI_SIZE])
162             {
163              
164             # at this point, the vertices with root X is larger, so point Y to X.
165 5280         7250 $rootOfVertexInfoY->[VI_PARENT] = $rootOfVertexInfoX->[VI_PARENT];
166              
167             # update the access time of the root.
168 5280         12517 $rootOfVertexInfoX->[VI_TIME] = $Self->{counter}++;
169              
170             # update the size of the X root.
171 5280         6336 $rootOfVertexInfoX->[VI_SIZE] += $rootOfVertexInfoY->[VI_SIZE];
172              
173             # set the min vertex for $rootOfVertexInfoX
174 5280 100       12712 $rootOfVertexInfoX->[VI_MIN] = $rootOfVertexInfoY->[VI_MIN] if $rootOfVertexInfoY->[VI_MIN] lt $rootOfVertexInfoX->[VI_MIN];
175              
176             # Y is no longer a root, so truncate the array of vertex info.
177 5280         14020 $#$rootOfVertexInfoY = 1;
178             }
179             else
180             {
181              
182             # at this point, the vertices with root Y is larger (or equal), so point X to Y.
183 11956         15715 $rootOfVertexInfoX->[VI_PARENT] = $rootOfVertexInfoY->[VI_PARENT];
184              
185             # update the access time of the root.
186 11956         18375 $rootOfVertexInfoY->[VI_TIME] = $Self->{counter}++;
187              
188             # update the size of the Y root.
189 11956         14880 $rootOfVertexInfoY->[VI_SIZE] += $rootOfVertexInfoX->[VI_SIZE];
190              
191             # set the min vertex for $rootOfVertexInfoY
192 11956 100       27747 $rootOfVertexInfoY->[VI_MIN] = $rootOfVertexInfoX->[VI_MIN] if $rootOfVertexInfoX->[VI_MIN] lt $rootOfVertexInfoY->[VI_MIN];
193              
194             # X is no longer a root, so truncate the array of vertex info.
195 11956         31606 $#$rootOfVertexInfoX = 1;
196             }
197              
198 17236         41685 return undef;
199             }
200              
201             =head2 C
202              
203             The method C returns the aggregate byte length of all the vertices currently in
204             the graph.
205              
206             =cut
207              
208             sub getSizeBytes
209             {
210 2012     2012 1 10856 return $_[0]->{totalSize};
211             }
212              
213              
214             =head2 C
215              
216             The method C returns the total number of vertices currently in
217             the graph.
218              
219             =cut
220              
221             sub getSizeVertices
222             {
223 0     0 1 0 return $_[0]->{totalVertices};
224             }
225              
226             # returns the vertex info for the root vertex of the vertex.
227             sub get_root_vertex_info # ($Vertex)
228             {
229              
230             # get the object.
231 53104     53104 0 53727 my $Self = $_[0];
232              
233             # get the info for the vertex.
234 53104         95324 my $vertexInfo = $Self->get_vertex_info($_[1], 0);
235              
236             # if the parent of $Vertex is $Vertex, then $Vertex is the root.
237 53104 100       156795 return $vertexInfo if ($vertexInfo->[VI_PARENT] eq $_[1]);
238              
239             # make the stack.
240 29216         43844 my @stack;
241              
242             # put the vertex info on the stack.
243 29216         42263 push @stack, $vertexInfo;
244              
245             # will hold the root of the node.
246 29216         26454 my $rootOfVertex;
247              
248 29216         51405 while (!defined($rootOfVertex))
249             {
250              
251             # get the vertex info.
252 46862         50816 my $vertexInfo = $stack[-1];
253              
254             # get the parent vertex info.
255 46862         88802 my $vertexInfoOfParent = $Self->get_vertex_info($stack[-1]->[VI_PARENT], 0);
256              
257             # if we found the root, store it and exit the loop.
258 46862 100       123299 if ($vertexInfoOfParent->[VI_PARENT] eq $stack[-1]->[VI_PARENT])
259             {
260 29216         28368 $rootOfVertex = $vertexInfoOfParent;
261 29216         38322 last;
262             }
263              
264             # push the parent vertex onto the stack.
265 17646         36228 push @stack, $vertexInfoOfParent;
266             }
267              
268             # set the parent of each vertex on the stack to the root.
269 29216         66370 for (my $i = 0 ; $i < @stack ; $i++)
270             {
271 46862         47867 my $vertexInfo = $stack[$i];
272 46862         62925 $vertexInfo->[VI_PARENT] = $rootOfVertex->[VI_PARENT];
273 46862         45764 delete $vertexInfo->[VI_MIN];
274 46862         102391 delete $vertexInfo->[VI_SIZE];
275             }
276              
277             # return the root of the vertex.
278 29216         57949 return $rootOfVertex;
279             }
280              
281             # returns the info about the vertex as an array reference
282             # [root, time, size, min-vertex]
283             # 0 - root is the root of the vertex
284             # 1 - time is a counter of when the vertex was last referenced
285             # size is the number of vertices in the component, it is only define if the vertex is a root
286             # min-vertex holds the lexographically minimum vertex in the component
287             sub get_vertex_info # ($Vertex)
288             {
289              
290             # get the object.
291 134488     134488 0 177098 my ($Self, $Vertex, $Create) = @_;
292              
293             # get the hash that holds the info on each vertex.
294 134488         171040 my $vertices = $Self->{vertices};
295              
296             # if the vertex exists return it.
297 134488 100       384409 return $vertices->{$Vertex} if exists $vertices->{$Vertex};
298              
299             # if not allowed to create the vertex then die.
300             # confess __LINE__ . ": vertex $Vertex does not exist.\n" unless $Create;
301              
302             # the vertex does not exist, so make it and return $Vertex as root.
303 18576         56265 $vertices->{$Vertex} = [];
304 18576         54597 $vertices->{$Vertex}[VI_PARENT] = $Vertex;
305 18576         34927 $vertices->{$Vertex}[VI_TIME] = $Self->{counter}++;
306 18576         28962 $vertices->{$Vertex}[VI_SIZE] = 1;
307 18576         30540 $vertices->{$Vertex}[VI_MIN] = $Vertex;
308 18576         22062 ++$Self->{totalVertices};
309             {
310 1     1   1795 use bytes;
  1         12  
  1         6  
  18576         19050  
311 18576         43803 $Self->{totalSize} += bytes::length($Vertex);
312             }
313 18576         74899 return $vertices->{$Vertex};
314             }
315              
316             =head2 C
317              
318             The method C returns the components of the graph.
319             In list context C returns the vertices of the connected components
320             as a list of array references; in scalar context the list is returned as an array reference.
321             No specific ordering is applied to
322             the list of components or the vertices inside the lists.
323              
324             =cut
325              
326             sub connected_components
327             {
328              
329             # get the object.
330 40     40 1 222 my $Self = $_[0];
331              
332             # get the hash of just the nodes.
333 40         120 my $components = $Self->connected_components_as_hash();
334              
335             # return the components as an array or array reference.
336 40 50       504 return (values %$components) if (wantarray);
337 0         0 return [ values %$components ];
338             }
339              
340             =head2 C
341              
342             The method C returns an array reference of pairs of
343             the form C<[vertex,component-id]>. The parameter C
344             sets the percentage of most recently used vertices that are
345             retained in the graph. This method is used by
346             L to potentially speedup the
347             computation of the components.
348              
349             =cut
350              
351             sub get_vertexCompIdPairs
352             {
353              
354             # get the object.
355 409     409 1 505 my ($Self, $PercentageToKeep) = @_;
356              
357             # get the percentage of nodes to keep.
358 409 50       823 $PercentageToKeep = 0 unless defined $PercentageToKeep;
359 409         617 $PercentageToKeep = abs $PercentageToKeep;
360 409 50       834 $PercentageToKeep = 1 if $PercentageToKeep > 1;
361              
362             # compute the number of vertices to purge.
363 409         592 my $verticesToKeep = $Self->{totalVertices};
364 409         605 $verticesToKeep = int($verticesToKeep * $PercentageToKeep);
365              
366             # get the hash of vertices.
367 409         676 my $vertices = $Self->{vertices};
368              
369             # to create list of purged list need the vertices sorted by last
370             # access time, so create a list of pairs [vertex, time, compId].
371 409         1600 my @vertexCompId;
372 409         1614 while (my ($vertex, $vertexInfo) = each %$vertices)
373             {
374              
375             # for each vertex create the pair [vertex, time].
376 3287         6501 push @vertexCompId, [ $vertex, $Self->get_root_vertex_info($vertex)->[VI_MIN], $vertexInfo->[VI_TIME] ];
377             }
378              
379             # reset all the data structures for the object.
380 409         886 $Self->clear();
381              
382             # sort @vertexCompId by time descendingly.
383 409         1105 @vertexCompId = sort { $b->[2] <=> $a->[2] } @vertexCompId;
  6537         8752  
384              
385             # truncate the time from each vertexCompId array.
386 409         777 foreach my $vertexCompId (@vertexCompId)
387             {
388 3287         6698 $#$vertexCompId -= 1;
389             }
390              
391             # add the first $verticesToKeep to the object.
392 409         1150 for (my $i = 0 ; $i < $verticesToKeep ; $i++)
393             {
394 0         0 $Self->add_edge($vertexCompId[$i]->[0], $vertexCompId[$i]->[1]);
395             }
396              
397             # return the components.
398 409         4460 return \@vertexCompId;
399             }
400              
401             # returns the components in a hash with the key of each component
402             # being the lexographical min of the vertices.
403             sub connected_components_as_hash
404             {
405              
406             # get the object.
407 40     40 0 66 my $Self = $_[0];
408              
409             # get the hash of vertex info.
410 40         73 my $vertices = $Self->{vertices};
411              
412             # get the hash to hold the components.
413 40         63 my %components;
414              
415 40         247 while (my ($vertex, undef) = each %$vertices)
416             {
417              
418             # for each vertex get its root vertex.
419 15289         26726 my $rootOfVertexInfo = $Self->get_root_vertex_info($vertex);
420              
421             # get the min vertex of the root.
422 15289         21040 my $minVertexOfComponent = $rootOfVertexInfo->[VI_MIN];
423              
424             # store the node in a list keyed on the $minVertexOfComponent.
425 15289 100       29338 $components{$minVertexOfComponent} = [] unless exists $components{$minVertexOfComponent};
426 15289         14615 push @{ $components{$minVertexOfComponent} }, $vertex;
  15289         65816  
427             }
428              
429             # return the components.
430 40         162 return \%components;
431             }
432              
433             # used for testing.
434             sub test_isRootMinForEachComponent
435             {
436              
437             # get the object.
438 0     0 0   my $Self = $_[0];
439              
440             # get the hash of vertex info.
441 0           my $vertices = $Self->{vertices};
442              
443             # get the hash to hold the components.
444 0           my %components;
445              
446 0           while (my ($vertex, undef) = each %$vertices)
447             {
448              
449             # for each vertex get its root vertex.
450 0           my $rootOfVertexInfo = $Self->get_root_vertex_info($vertex);
451              
452             # return false if the min vertex is greater.
453 0 0         return 0 if $rootOfVertexInfo->[VI_MIN] gt $vertex;
454             }
455              
456             # return the true.
457 0           return 1;
458             }
459              
460             # used for testing.
461             sub test_areRootsCorrect
462             {
463              
464             # get the object.
465 0     0 0   my $Self = $_[0];
466              
467             # get the hash of the components.
468 0           my $hashForComponents = $Self->connected_components_as_hash();
469              
470             # get the hash to hold the components.
471 0           my %components;
472              
473 0           while (my ($minVertex, $listOfComponents) = each %$hashForComponents)
474             {
475              
476             # there should be no empty component lists.
477 0 0         unless ($listOfComponents)
478             {
479 0           die "empty component list.\n";
480             }
481              
482             # get the root vertex (not necessarily the min).
483 0           my $rootOfVertexInfo = $Self->get_root_vertex_info($listOfComponents->[0]);
484              
485             # the time of the root should not be less than all the nodes.
486 0           foreach my $vertex (@$listOfComponents)
487             {
488 0           my $vertexInfo = $Self->{vertices}{$vertex};
489 0 0         if ($rootOfVertexInfo->[VI_TIME] < $vertexInfo->[VI_TIME])
490             {
491 0           die "access time of root is $rootOfVertexInfo->[VI_TIME] < $vertexInfo->[VI_TIME] the vertex.\n";
492             }
493             }
494              
495             # compare the size of the component list to the size in the root vertex info.
496 0 0         if ($rootOfVertexInfo->[VI_SIZE] != scalar(@$listOfComponents))
497             {
498 0           my $realSize = scalar(@$listOfComponents);
499 0           warn "component sizes $rootOfVertexInfo->[VI_SIZE] != $realSize do not match.\n";
500 0           return 0;
501             }
502              
503             }
504              
505             # return the true.
506 0           return 1;
507             }
508              
509             =head1 INSTALLATION
510              
511             Use L to install the module and all its prerequisites:
512              
513             perl -MCPAN -e shell
514             cpan[1]> install Graph::Undirected::Components
515              
516             =head1 BUGS
517              
518             Please email bugs reports or feature requests to C, or through
519             the web interface at L. The author
520             will be notified and you can be automatically notified of progress on the bug fix or feature request.
521              
522             =head1 AUTHOR
523              
524             Jeff Kubina
525              
526             =head1 COPYRIGHT
527              
528             Copyright (c) 2009 Jeff Kubina. All rights reserved.
529             This program is free software; you can redistribute
530             it and/or modify it under the same terms as Perl itself.
531              
532             The full text of the license can be found in the
533             LICENSE file included with this module.
534              
535             =head1 KEYWORDS
536              
537             connected components, network, undirected graph
538              
539             =head1 SEE ALSO
540              
541             L
542              
543             =begin html
544              
545             connected component,
546             disjoint set data structure,
547             graph,
548             network,
549              
550             =end html
551              
552             =cut
553              
554             1;
555              
556             # The preceding line will help the module return a true value