| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Graph::Undirected::Components::External; |
|
2
|
1
|
|
|
1
|
|
12004
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
44
|
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
131
|
|
|
4
|
1
|
|
|
1
|
|
15
|
use File::Path qw(make_path remove_tree); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
110
|
|
|
5
|
1
|
|
|
1
|
|
13591
|
use File::Temp qw(tempdir tempfile); |
|
|
1
|
|
|
|
|
25852
|
|
|
|
1
|
|
|
|
|
68
|
|
|
6
|
1
|
|
|
1
|
|
1368
|
use Sort::External; |
|
|
1
|
|
|
|
|
6036
|
|
|
|
1
|
|
|
|
|
53
|
|
|
7
|
1
|
|
|
1
|
|
4855
|
use Text::CSV; |
|
|
1
|
|
|
|
|
26301
|
|
|
|
1
|
|
|
|
|
8
|
|
|
8
|
1
|
|
|
1
|
|
17697
|
use Log::Log4perl; |
|
|
1
|
|
|
|
|
112625
|
|
|
|
1
|
|
|
|
|
9
|
|
|
9
|
1
|
|
|
1
|
|
66
|
use Graph::Undirected::Components; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
57
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use Time::HiRes; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
10
|
|
|
11
|
|
|
|
|
|
|
#use Data::Dump qw(dump); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
BEGIN |
|
14
|
|
|
|
|
|
|
{ |
|
15
|
1
|
|
|
1
|
|
154
|
use Exporter (); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
24
|
|
|
16
|
1
|
|
|
1
|
|
6
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
143
|
|
|
17
|
1
|
|
|
1
|
|
3
|
$VERSION = '0.31'; |
|
18
|
1
|
|
|
|
|
19
|
@ISA = qw(Exporter); |
|
19
|
1
|
|
|
|
|
2
|
@EXPORT = qw(); |
|
20
|
1
|
|
|
|
|
2
|
@EXPORT_OK = qw(); |
|
21
|
1
|
|
|
|
|
4110
|
%EXPORT_TAGS = (); |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $el = "\n"; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#01234567890123456789012345678901234567891234 |
|
27
|
|
|
|
|
|
|
#Computes components of an undirected graph. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 NAME |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
C - Computes components of an undirected graph. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
use Data::Dump qw(dump); |
|
36
|
|
|
|
|
|
|
use Log::Log4perl qw(:easy); |
|
37
|
|
|
|
|
|
|
use Graph::Undirected::Components::External; |
|
38
|
|
|
|
|
|
|
Log::Log4perl->easy_init ($WARN); |
|
39
|
|
|
|
|
|
|
my $componenter = Graph::Undirected::Components::External->new(outputFile => 'vertexCompId.txt', purgeSizeBytes => 5000); |
|
40
|
|
|
|
|
|
|
my $vertices = 10000; |
|
41
|
|
|
|
|
|
|
for (my $i = 0; $i < $vertices; $i++) |
|
42
|
|
|
|
|
|
|
{ |
|
43
|
|
|
|
|
|
|
$componenter->add_edge (int rand $vertices, int rand $vertices); |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
dump $componenter->finish (); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
C computes the components of an undirected |
|
50
|
|
|
|
|
|
|
graph limited only by the amount of free disk space. All errors, warnings, and |
|
51
|
|
|
|
|
|
|
informational messages are logged using L. |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head2 C |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The method C creates an instance of C |
|
58
|
|
|
|
|
|
|
with the following parameters. |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=over |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=item C |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
workingDirectory => File::Temp::tempdir() |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
C is an optional parameter specifying the path |
|
67
|
|
|
|
|
|
|
to a directory that all temporary files are written to; the default |
|
68
|
|
|
|
|
|
|
is set using L. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=item C |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
purgeSizeBytes => 1000000 |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
C is an optional parameter specifying the aggregate byte size |
|
76
|
|
|
|
|
|
|
that all the vertices added to the internal instance of |
|
77
|
|
|
|
|
|
|
L must exceed before its content is purged |
|
78
|
|
|
|
|
|
|
to disk. The optimal value depends on the total internal memory |
|
79
|
|
|
|
|
|
|
available. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=item C |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
purgeSizeVertices => undef |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
C is an optional parameter specifying the total |
|
86
|
|
|
|
|
|
|
vertices added to the internal instance of |
|
87
|
|
|
|
|
|
|
L that must be exceed before its content is purged |
|
88
|
|
|
|
|
|
|
to disk. If C and C are both defined, then a purge |
|
89
|
|
|
|
|
|
|
occurs when either threshold is exceeded. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item C |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
retainPercentage => 0.10 |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
C is an optional parameter specifying the percentage of |
|
96
|
|
|
|
|
|
|
the most recently used vertices to be retained in the internal instance of |
|
97
|
|
|
|
|
|
|
L when it is purged. If the edges of the |
|
98
|
|
|
|
|
|
|
graph are not added in a random order, caching some of the vertices can |
|
99
|
|
|
|
|
|
|
speedup the computation of the components. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item C |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
outputFile => ... |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
C is the path to the file that the C<(vertex,component-id)> pairs |
|
106
|
|
|
|
|
|
|
are written to separated by the C; the directory of the file should exist. An exception is |
|
107
|
|
|
|
|
|
|
thrown if C is undefined or the file cannot be written to. |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item C |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
delimiter => ',' |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
C is the delimiter used to separate the vertices of an edge when |
|
114
|
|
|
|
|
|
|
they are written to temporary files. All vertices should be encoded so that |
|
115
|
|
|
|
|
|
|
they do not contain the delimiter, that is, it should be true that |
|
116
|
|
|
|
|
|
|
C for all vertices. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=back |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub new |
|
123
|
|
|
|
|
|
|
{ |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# get the object type and parameters. |
|
126
|
7
|
|
|
7
|
1
|
889578
|
my ($Class, %Parameters) = @_; |
|
127
|
7
|
|
33
|
|
|
69
|
my $Self = bless({}, ref($Class) || $Class); |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# set the flag when finished is called so no more edges are added. |
|
130
|
7
|
|
|
|
|
32
|
$Self->{finishedCalled} = 0; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# set the start time. |
|
133
|
7
|
|
|
|
|
36
|
$Self->{startTime} = $Self->getCpuTime(); |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# flag if temporary files and directories should be deleted; |
|
136
|
|
|
|
|
|
|
# used for debugging. |
|
137
|
7
|
|
|
|
|
22
|
$Self->{cleanup} = 1; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# set the default delimiter for the input and output files. |
|
140
|
7
|
|
|
|
|
14
|
$Self->{delimiter} = ','; |
|
141
|
7
|
100
|
|
|
|
33
|
$Self->{delimiter} = $Parameters{delimiter} if exists $Parameters{delimiter}; |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# set the recursion level |
|
144
|
7
|
|
|
|
|
18
|
$Self->{level} = 0; |
|
145
|
7
|
100
|
66
|
|
|
53
|
$Self->{level} = $Parameters{level} if (exists($Parameters{level}) && defined($Parameters{level})); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# set the workingDirectory and create it if needed. |
|
148
|
7
|
100
|
|
|
|
28
|
if (exists($Parameters{baseDirectory})) |
|
149
|
|
|
|
|
|
|
{ |
|
150
|
6
|
|
|
|
|
14
|
$Self->{baseDirectory} = $Parameters{baseDirectory}; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
else |
|
153
|
|
|
|
|
|
|
{ |
|
154
|
1
|
|
|
|
|
2
|
my $workingDirectory; |
|
155
|
1
|
50
|
33
|
|
|
6
|
if (exists($Parameters{workingDirectory}) && defined($Parameters{workingDirectory})) |
|
156
|
|
|
|
|
|
|
{ |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# if the directory does not exist created it. |
|
159
|
0
|
0
|
|
|
|
0
|
unless (-d $Parameters{workingDirectory}) |
|
160
|
|
|
|
|
|
|
{ |
|
161
|
0
|
|
|
|
|
0
|
make_path($Parameters{workingDirectory}, { verbose => 0, mode => 0700 }); |
|
162
|
0
|
|
|
|
|
0
|
$Self->{unlinkWorkingDirectory} = 1; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
0
|
$workingDirectory = $Parameters{workingDirectory}; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
else |
|
168
|
|
|
|
|
|
|
{ |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# none given as a parameters, so create a temporary one. |
|
171
|
1
|
50
|
|
|
|
9
|
$workingDirectory = tempdir(CLEANUP => $Self->{cleanup}) unless defined $workingDirectory; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# if the directory does not exist log an error and die. |
|
175
|
1
|
50
|
|
|
|
874
|
unless (-e $workingDirectory) |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
178
|
0
|
|
|
|
|
0
|
$logger->logdie("error: could not create directory '$workingDirectory'.\n"); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# if workingDirectory is not a directory log an error and die. |
|
182
|
1
|
50
|
|
|
|
23
|
unless (-d $workingDirectory) |
|
183
|
|
|
|
|
|
|
{ |
|
184
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
185
|
0
|
|
|
|
|
0
|
$logger->logdie("error: '$workingDirectory' is not a directory.\n"); |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# now create the base directory in the working directory. |
|
189
|
1
|
|
|
|
|
5
|
my $baseDirectory = tempdir(DIR => $workingDirectory, CLEANUP => $Self->{cleanup}); |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# if $baseDirectory is not a directory log an error and die. |
|
192
|
1
|
50
|
|
|
|
359
|
unless (-d $baseDirectory) |
|
193
|
|
|
|
|
|
|
{ |
|
194
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
195
|
0
|
|
|
|
|
0
|
$logger->logdie("error: $baseDirectory is not a directory.\n"); |
|
196
|
|
|
|
|
|
|
} |
|
197
|
1
|
|
|
|
|
4
|
$Self->{baseDirectory} = $baseDirectory; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# create the object to compute the components using internal memory. |
|
201
|
7
|
|
|
|
|
56
|
$Self->{componenter} = Graph::Undirected::Components->new(); |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# set the purge size of the componenter. |
|
204
|
7
|
|
|
|
|
17
|
my $purgeSizeBytes = 1000000; |
|
205
|
7
|
50
|
33
|
|
|
80
|
$purgeSizeBytes = int abs $Parameters{purgeSizeBytes} if exists($Parameters{purgeSizeBytes}) && defined($Parameters{purgeSizeBytes}); |
|
206
|
7
|
|
|
|
|
19
|
$Self->{purgeSizeBytes} = $purgeSizeBytes; |
|
207
|
7
|
50
|
66
|
|
|
49
|
$Self->{purgeSizeVertices} = int abs $Parameters{purgeSizeVertices} if exists($Parameters{purgeSizeVertices}) && defined($Parameters{purgeSizeVertices}); |
|
208
|
7
|
|
|
|
|
16
|
$Self->{totalEdgesAddedSinceLastPurge} = 0; |
|
209
|
7
|
|
|
|
|
13
|
$Self->{totalEdges} = 0; |
|
210
|
7
|
|
|
|
|
15
|
$Self->{totalVertices} = 0; |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# set the percentage of vertices to retain in the componenter when purging. |
|
213
|
7
|
|
|
|
|
12
|
my $retainPercentage = 0.10; |
|
214
|
7
|
100
|
66
|
|
|
45
|
$retainPercentage = abs $Parameters{retainPercentage} |
|
215
|
|
|
|
|
|
|
if (exists($Parameters{retainPercentage}) && defined($Parameters{retainPercentage})); |
|
216
|
7
|
50
|
|
|
|
24
|
$retainPercentage = 1 if $retainPercentage > 1; |
|
217
|
7
|
|
|
|
|
136
|
$Self->{retainPercentage} = $retainPercentage; |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# set the file that the "vertex,compId" pairs will be written to. |
|
220
|
7
|
50
|
33
|
|
|
46
|
unless (exists($Parameters{outputFile}) && defined($Parameters{outputFile})) |
|
221
|
|
|
|
|
|
|
{ |
|
222
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
223
|
0
|
|
|
|
|
0
|
$logger->logdie("error: parameter outputFile was not defined.\n"); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
7
|
|
|
|
|
17
|
$Self->{outputFile} = $Parameters{outputFile}; |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# make sure we can write to the output file before devoting time to |
|
228
|
|
|
|
|
|
|
# computing the connected components. |
|
229
|
|
|
|
|
|
|
{ |
|
230
|
7
|
|
|
|
|
7
|
my $outputFileHandle; |
|
|
7
|
|
|
|
|
10
|
|
|
231
|
7
|
50
|
|
|
|
447
|
unless (open($outputFileHandle, '>', $Self->{outputFile})) |
|
232
|
|
|
|
|
|
|
{ |
|
233
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
234
|
0
|
|
|
|
|
0
|
$logger->logdie("error: could not open file '$Self->{outputFile}' for writing.\n"); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
7
|
|
|
|
|
107
|
close $outputFileHandle; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
7
|
|
|
|
|
430
|
unlink $Self->{outputFile}; |
|
239
|
|
|
|
|
|
|
|
|
240
|
7
|
|
|
|
|
27
|
return $Self; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head1 METHODS |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head2 C |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
The method C updates the components of the graph using the edge |
|
248
|
|
|
|
|
|
|
C<(vertexA, vertexB)>. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=over |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item vertexA, vertexB |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
The vertices of the edge C<(vertexA, vertexB)> are Perl strings. If only C |
|
255
|
|
|
|
|
|
|
is defined, then the edge C<(vertexA, vertexA)> is added to the graph. The method always returns |
|
256
|
|
|
|
|
|
|
undef. |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=back |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=cut |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub add_edge |
|
263
|
|
|
|
|
|
|
{ |
|
264
|
2012
|
|
|
2012
|
1
|
6035
|
my ($Self, @Edge) = @_; |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# if nothing to add return now. |
|
267
|
2012
|
50
|
|
|
|
4254
|
return undef unless @Edge; |
|
268
|
|
|
|
|
|
|
|
|
269
|
2012
|
50
|
|
|
|
4227
|
if ($Self->{finishedCalled}) |
|
270
|
|
|
|
|
|
|
{ |
|
271
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
272
|
0
|
|
|
|
|
0
|
$logger->logdie("error: cannot add more edges after call to finish().\n"); |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
2012
|
|
|
|
|
6363
|
$Self->{componenter}->add_edge(@Edge); |
|
276
|
2012
|
|
|
|
|
3177
|
++$Self->{totalEdgesAddedSinceLastPurge}; |
|
277
|
2012
|
|
|
|
|
2370
|
++$Self->{totalEdges}; |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# if componenter is too large, purge it to disk. |
|
280
|
2012
|
100
|
33
|
|
|
5789
|
if |
|
|
|
|
66
|
|
|
|
|
|
281
|
|
|
|
|
|
|
( |
|
282
|
|
|
|
|
|
|
($Self->{componenter}->getSizeBytes() > $Self->{purgeSizeBytes}) || |
|
283
|
|
|
|
|
|
|
((exists $Self->{purgeSizeVertices}) && ($Self->{componenter}->getSizeVertices() > $Self->{purgeSizeVertices})) |
|
284
|
|
|
|
|
|
|
) |
|
285
|
|
|
|
|
|
|
{ |
|
286
|
|
|
|
|
|
|
# add the root node pairs to the external component finder. |
|
287
|
402
|
|
|
|
|
985
|
$Self->purge($Self->{retainPercentage}); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
2012
|
|
|
|
|
10588
|
return undef; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 C |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
The method C adds all the edges in a file to the graph. |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=over |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item fileOfEdges => ... |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
C specifies the path to the file containing the edges to |
|
302
|
|
|
|
|
|
|
add. An exception is thrown if there are problems openning or reading the file. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=item delimiter |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
The edges are read from C using L; C |
|
307
|
|
|
|
|
|
|
must be to the delimiter used to separate the vertices of an edge in the file. The default |
|
308
|
|
|
|
|
|
|
is the value set with the L constructor. |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=back |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=cut |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub add_file |
|
315
|
|
|
|
|
|
|
{ |
|
316
|
0
|
|
|
0
|
1
|
0
|
my ($Self, %Parameters) = @_; |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# if no fileOfEdges, return now. |
|
319
|
0
|
0
|
0
|
|
|
0
|
if (!exists ($Parameters{fileOfEdges}) || !defined ($Parameters{fileOfEdges})) |
|
320
|
|
|
|
|
|
|
{ |
|
321
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
322
|
0
|
|
|
|
|
0
|
$logger->logdie("error: parameter 'fileOfEdges' was not defined.\n"); |
|
323
|
|
|
|
|
|
|
} |
|
324
|
0
|
|
|
|
|
0
|
my $fileOfEdges = $Parameters{fileOfEdges}; |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# set the default delimiter. |
|
327
|
0
|
|
|
|
|
0
|
my $delimiter = $Self->{delimiter}; |
|
328
|
0
|
0
|
|
|
|
0
|
$delimiter = $Parameters{delimiter} if exists $Parameters{delimiter}; |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# make sure the file exists. |
|
331
|
0
|
|
|
|
|
0
|
my $fileOfEdgesHandle; |
|
332
|
0
|
0
|
|
|
|
0
|
unless (open($fileOfEdgesHandle, '<:encoding(utf8)', $fileOfEdges)) |
|
333
|
|
|
|
|
|
|
{ |
|
334
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
335
|
0
|
|
|
|
|
0
|
$logger->logdie("error: could not open file '$fileOfEdges' for reading: $!\n"); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# create the CSV parser. |
|
339
|
0
|
|
|
|
|
0
|
my $csvParser = Text::CSV->new({ binary => 1, sep_char => $delimiter }); |
|
340
|
0
|
0
|
|
|
|
0
|
unless ($csvParser) |
|
341
|
|
|
|
|
|
|
{ |
|
342
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
343
|
0
|
|
|
|
|
0
|
$logger->logdie("error: could not open CSV parser; " . Text::CSV->error_diag() . "\n"); |
|
344
|
|
|
|
|
|
|
} |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# add each edge in the file to the graph. |
|
347
|
0
|
|
|
|
|
0
|
while (my $edge = $csvParser->getline($fileOfEdgesHandle)) |
|
348
|
|
|
|
|
|
|
{ |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# if no edge, skip it. |
|
351
|
0
|
0
|
|
|
|
0
|
next unless defined $edge; |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# if the first column is empty, skip it. |
|
354
|
0
|
0
|
0
|
|
|
0
|
next unless exists($edge->[0]) && defined($edge->[0]); |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# if the second column is emtpy, set it to the first. |
|
357
|
0
|
0
|
0
|
|
|
0
|
$edge->[1] = $edge->[0] if (!exists($edge->[1]) || !defined($edge->[1])); |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# add the edge. |
|
360
|
0
|
|
|
|
|
0
|
$Self->add_edge($edge->[0], $edge->[1]); |
|
361
|
|
|
|
|
|
|
} |
|
362
|
0
|
|
|
|
|
0
|
close $fileOfEdgesHandle; |
|
363
|
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
0
|
return undef; |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub purge |
|
368
|
|
|
|
|
|
|
{ |
|
369
|
408
|
|
|
408
|
0
|
637
|
my ($Self, $RetainPercentage) = @_; |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# set the default for the percentage of vertices retained in vertexCompIdSorter. |
|
372
|
408
|
50
|
|
|
|
785
|
$RetainPercentage = 0 unless defined $RetainPercentage; |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# create the vertexCompIdSorter if it does not exist. |
|
375
|
408
|
100
|
|
|
|
1042
|
unless (exists $Self->{vertexCompIdSorter}) |
|
376
|
|
|
|
|
|
|
{ |
|
377
|
6
|
|
|
|
|
53
|
$Self->{vertexCompIdSorter} = Sort::External->new(mem_threshold => 64 * 1024 * 1024, working_dir => $Self->{baseDirectory}); |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# get the list of vertexCompIds. |
|
381
|
408
|
100
|
|
|
|
4207
|
$RetainPercentage = 0 if $Self->{totalEdgesAddedSinceLastPurge} < 2; |
|
382
|
408
|
|
|
|
|
1161
|
my $listOfVertexCompIds = $Self->{componenter}->get_vertexCompIdPairs($RetainPercentage); |
|
383
|
408
|
|
|
|
|
636
|
my $totalVertexCompIds = scalar(@$listOfVertexCompIds); |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
# add each vertexCompId to the external sorter. |
|
386
|
408
|
|
|
|
|
944
|
for (my $i = 0 ; $i < $totalVertexCompIds ; $i++) |
|
387
|
|
|
|
|
|
|
{ |
|
388
|
3281
|
|
|
|
|
7666
|
my $vertexCompIdString = $listOfVertexCompIds->[$i][0] . $Self->{delimiter} . $listOfVertexCompIds->[$i][1]; |
|
389
|
3281
|
|
|
|
|
13669
|
$Self->{vertexCompIdSorter}->feed($vertexCompIdString); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
408
|
|
|
|
|
505
|
$listOfVertexCompIds = undef; |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# keep track of the number of edges added between purges. |
|
394
|
408
|
|
|
|
|
2611
|
$Self->{totalEdgesAddedSinceLastPurge} = 0; |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# log the purge as an info message. |
|
397
|
|
|
|
|
|
|
{ |
|
398
|
408
|
|
|
|
|
623
|
my $logger = Log::Log4perl->get_logger(); |
|
|
408
|
|
|
|
|
1473
|
|
|
399
|
408
|
|
|
|
|
19054
|
$logger->info("purged $totalVertexCompIds vertex,component-id pairs.\n"); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
408
|
|
|
|
|
3267
|
return undef; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head2 C |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
The method C completes the computation of the connected components |
|
408
|
|
|
|
|
|
|
and writes the pairs C<(vertex,component-id)> to the L. For |
|
409
|
|
|
|
|
|
|
each component C is the lexographical minimum of all the |
|
410
|
|
|
|
|
|
|
vertices in the component. |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
No edges can be added to the graph after C is called. |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=cut |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub finish |
|
417
|
|
|
|
|
|
|
{ |
|
418
|
7
|
|
|
7
|
1
|
2422
|
my ($Self) = @_; |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# once finish is called no more edges can be added. |
|
421
|
7
|
|
|
|
|
21
|
$Self->{finishedCalled} = 1; |
|
422
|
|
|
|
|
|
|
|
|
423
|
7
|
100
|
|
|
|
25
|
if (exists $Self->{vertexCompIdSorter}) |
|
424
|
|
|
|
|
|
|
{ |
|
425
|
|
|
|
|
|
|
# purge the last of the internal vertexCompIds and do not retain any vertices. |
|
426
|
6
|
|
|
|
|
19
|
$Self->purge(0); |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# finish sorting the vertexCompId pairs. |
|
429
|
6
|
|
|
|
|
35
|
$Self->{vertexCompIdSorter}->finish(); |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# get the sorter for the oldCompId-to-oldCompId file of pairs. |
|
432
|
6
|
|
|
|
|
3471
|
my $oldCompIdToOldSorter = Sort::External->new(mem_threshold => 64 * 1024 * 1024, working_dir => $Self->{baseDirectory}); |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
# get the temporay file for the oldCompId-to-vertex file of pairs. |
|
435
|
6
|
|
|
|
|
3486
|
my ($oldCompIdToVertexFileFh, $oldCompIdToVertexFile) = |
|
436
|
|
|
|
|
|
|
tempfile("OV_XXXXXXXXXX", DIR => $Self->{baseDirectory}, UNLINK => $Self->{cleanup}); |
|
437
|
6
|
|
|
|
|
2627
|
close $oldCompIdToVertexFileFh; |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# compute the subgraph based on the oldCompId-to-oldCompId pairs and |
|
440
|
|
|
|
|
|
|
# write the oldCompId-to-vertex pairs. |
|
441
|
6
|
|
|
|
|
33
|
$Self->writeSubgraphInfoToFiles($oldCompIdToOldSorter, $oldCompIdToVertexFile); |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# finish the sort. |
|
444
|
6
|
|
|
|
|
2542
|
$oldCompIdToOldSorter->finish(); |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# get the temporay file for the oldCompId-to-newCompId file of pairs. |
|
447
|
6
|
|
|
|
|
1814
|
my ($oldCompIdToNewCompIdFileFh, $oldCompIdToNewCompIdFile) = |
|
448
|
|
|
|
|
|
|
tempfile("ON_XXXXXXXXXX", DIR => $Self->{baseDirectory}, UNLINK => $Self->{cleanup}); |
|
449
|
6
|
|
|
|
|
2907
|
close $oldCompIdToNewCompIdFileFh; |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# compute the components of the subgraph from the oldCompId-to-oldCompId pairs. |
|
452
|
|
|
|
|
|
|
{ |
|
453
|
6
|
|
|
|
|
14
|
my $externalComponenter = |
|
|
6
|
|
|
|
|
103
|
|
|
454
|
|
|
|
|
|
|
Graph::Undirected::Components::External->new( |
|
455
|
|
|
|
|
|
|
baseDirectory => $Self->{baseDirectory}, |
|
456
|
|
|
|
|
|
|
outputFile => $oldCompIdToNewCompIdFile, |
|
457
|
|
|
|
|
|
|
level => 1 + $Self->{level}, |
|
458
|
|
|
|
|
|
|
delimiter => $Self->{delimiter}, |
|
459
|
|
|
|
|
|
|
purgeSizeBytes => $Self->{purgeSizeBytes}, |
|
460
|
|
|
|
|
|
|
purgeSizeVertices => $Self->{purgeSizeVertices}, |
|
461
|
|
|
|
|
|
|
retainPercentage => $Self->{retainPercentage} |
|
462
|
|
|
|
|
|
|
); |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# add the edges in sorted order. |
|
465
|
6
|
|
|
|
|
20
|
my $previousEdgeStr = ''; |
|
466
|
6
|
|
|
|
|
46
|
while (defined (my $edgeStr = $oldCompIdToOldSorter->fetch())) |
|
467
|
|
|
|
|
|
|
{ |
|
468
|
|
|
|
|
|
|
# skip the edge if a duplicate. |
|
469
|
1978
|
100
|
|
|
|
14361
|
next if $previousEdgeStr eq $edgeStr; |
|
470
|
1524
|
|
|
|
|
1723
|
$previousEdgeStr = $edgeStr; |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
# split the edge. |
|
473
|
1524
|
|
|
|
|
5562
|
my @edge = split (/$Self->{delimiter}/, $edgeStr); |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# add the edge to the graph. |
|
476
|
1524
|
|
|
|
|
3381
|
$externalComponenter->add_edge (@edge); |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# purge the edge sorted. |
|
480
|
6
|
|
|
|
|
123
|
$oldCompIdToOldSorter = undef; |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
# finish computing the components of the graph. |
|
483
|
6
|
|
|
|
|
60
|
my $processingStats = $externalComponenter->finish(); |
|
484
|
6
|
50
|
|
|
|
6113
|
$Self->{processingStats} = [] unless exists $Self->{processingStats}; |
|
485
|
6
|
|
|
|
|
17
|
push @{ $Self->{processingStats} }, @$processingStats; |
|
|
6
|
|
|
|
|
49
|
|
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# map the components of the subgraph to the original nodes. |
|
489
|
6
|
|
|
|
|
41
|
$Self->mapComponentsOfSubgraphToNodes($oldCompIdToNewCompIdFile, $oldCompIdToVertexFile, $Self->{outputFile}, $Self->{baseDirectory}); |
|
490
|
|
|
|
|
|
|
|
|
491
|
6
|
50
|
|
|
|
14955
|
unlink $oldCompIdToNewCompIdFile if $Self->{cleanup}; |
|
492
|
6
|
50
|
|
|
|
1266
|
unlink $oldCompIdToVertexFile if $Self->{cleanup}; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
else |
|
495
|
|
|
|
|
|
|
{ |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
# the edges fit in memory, so just compute the components and write the results to the file. |
|
498
|
1
|
|
|
|
|
6
|
$Self->outputVertexCompId(); |
|
499
|
|
|
|
|
|
|
} |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
# store the processing stats. |
|
502
|
7
|
100
|
|
|
|
53
|
$Self->{processingStats} = [] unless exists $Self->{processingStats}; |
|
503
|
7
|
|
|
|
|
49
|
my $totalTime = $Self->getCpuTime($Self->{startTime}); |
|
504
|
7
|
|
|
|
|
45
|
push @{ $Self->{processingStats} }, { level => $Self->{level}, time => $totalTime, edges => $Self->{totalEdges} }; |
|
|
7
|
|
|
|
|
70
|
|
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# log the stats as an info message. |
|
507
|
|
|
|
|
|
|
{ |
|
508
|
7
|
|
|
|
|
16
|
my $logger = Log::Log4perl->get_logger(); |
|
|
7
|
|
|
|
|
136
|
|
|
509
|
7
|
|
|
|
|
685
|
$logger->info("processed $Self->{totalEdges} edges in $totalTime seconds; recusion level is $Self->{level}.\n"); |
|
510
|
|
|
|
|
|
|
} |
|
511
|
|
|
|
|
|
|
|
|
512
|
7
|
|
|
|
|
363
|
return $Self->{processingStats}; |
|
513
|
|
|
|
|
|
|
} |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub mapComponentsOfSubgraphToNodes |
|
516
|
|
|
|
|
|
|
{ |
|
517
|
6
|
|
|
6
|
0
|
19
|
my ($Self, $OldCompIdToNewCompIdFile, $OldCompIdToVertexFile, $VertexToNewCompIdFile, $WorkingDirectory) = @_; |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# get the delimiter to use for the records. |
|
520
|
6
|
|
|
|
|
16
|
my $delimiter = $Self->{delimiter}; |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# create the sorter to merge the OldCompId-NewCompId and OldCompId-Vertex edges. |
|
523
|
6
|
|
|
|
|
131
|
my $mergeSorter = Sort::External->new(mem_threshold => 64 * 1024 * 1024, working_dir => $WorkingDirectory); |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
{ |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# open the file $OldCompIdToNewCompIdFile to read each of the compComp edges. |
|
528
|
6
|
|
|
|
|
5889
|
my $oldCompIdToNewCompIdFileHandle; |
|
|
6
|
|
|
|
|
13
|
|
|
529
|
6
|
50
|
|
|
|
690
|
unless (open($oldCompIdToNewCompIdFileHandle, '<', $OldCompIdToNewCompIdFile)) |
|
530
|
|
|
|
|
|
|
{ |
|
531
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
532
|
0
|
|
|
|
|
0
|
$logger->logdie("could not open file '$OldCompIdToNewCompIdFile' for reading: $!\n"); |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
# set the delimiter for the oldCompId-NewCompId so the pairs are first when sorted. |
|
536
|
6
|
|
|
|
|
21
|
my $oldCompIdToNewCompIdDelimiter = $delimiter . 'cc' . $delimiter; |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# cache the previous string to test for skipping of duplicates. |
|
539
|
6
|
|
|
|
|
13
|
my $previousOldCompIdToNewCompIdString = ''; |
|
540
|
6
|
|
|
|
|
356
|
while (defined(my $oldCompIdToNewCompIdString = <$oldCompIdToNewCompIdFileHandle>)) |
|
541
|
|
|
|
|
|
|
{ |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# remove the line feed from the string. |
|
544
|
676
|
|
|
|
|
1510
|
chop $oldCompIdToNewCompIdString; |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
# convert the string to its pair of records. |
|
547
|
676
|
|
|
|
|
2615
|
my @oldCompIdToNewCompIdRecord = split(/$delimiter/, $oldCompIdToNewCompIdString); |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# make sure the strings parses into only two items. |
|
550
|
676
|
50
|
|
|
|
1519
|
if (@oldCompIdToNewCompIdRecord != 2) |
|
551
|
|
|
|
|
|
|
{ |
|
552
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
553
|
0
|
|
|
|
|
0
|
$logger->logdie("error: oldCompId to newCompId string record does not have two values.\n"); |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# feed the oldCompId-NewCompId pairs to the sorter. |
|
557
|
676
|
100
|
66
|
|
|
5511
|
$mergeSorter->feed($oldCompIdToNewCompIdRecord[0] . $oldCompIdToNewCompIdDelimiter . $oldCompIdToNewCompIdRecord[1]) |
|
558
|
|
|
|
|
|
|
if ( ($previousOldCompIdToNewCompIdString ne $oldCompIdToNewCompIdString) |
|
559
|
|
|
|
|
|
|
&& ($oldCompIdToNewCompIdRecord[0] ne $oldCompIdToNewCompIdRecord[1])); |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# store the string. |
|
562
|
676
|
|
|
|
|
3119
|
$previousOldCompIdToNewCompIdString = $oldCompIdToNewCompIdString; |
|
563
|
|
|
|
|
|
|
} |
|
564
|
6
|
|
|
|
|
99
|
close $oldCompIdToNewCompIdFileHandle; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
{ |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# open the file $OldCompIdToVertexFile to read each of the pairs. |
|
570
|
6
|
|
|
|
|
13
|
my $oldCompIdToVertexFileHandle; |
|
|
6
|
|
|
|
|
11
|
|
|
571
|
6
|
50
|
|
|
|
804
|
unless (open($oldCompIdToVertexFileHandle, '<', $OldCompIdToVertexFile)) |
|
572
|
|
|
|
|
|
|
{ |
|
573
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
574
|
0
|
|
|
|
|
0
|
$logger->logdie("could not open file '$OldCompIdToVertexFile' for reading: $!\n"); |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# set the delimiter for the oldCompId-Vertex so the pairs are second when sorted. |
|
578
|
6
|
|
|
|
|
275
|
my $oldCompIdToVertexDelimiter = $delimiter . 'cn' . $delimiter; |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# cache the previous string to test for skipping of duplicates. |
|
581
|
6
|
|
|
|
|
9
|
my $previousOldCompIdToVertexString = ''; |
|
582
|
6
|
|
|
|
|
189
|
while (defined(my $oldCompIdToVertexString = <$oldCompIdToVertexFileHandle>)) |
|
583
|
|
|
|
|
|
|
{ |
|
584
|
1159
|
|
|
|
|
1472
|
chop $oldCompIdToVertexString; |
|
585
|
1159
|
|
|
|
|
3198
|
my @oldCompIdToVertexRecord = split(/$delimiter/, $oldCompIdToVertexString); |
|
586
|
1159
|
50
|
|
|
|
2427
|
if (@oldCompIdToVertexRecord != 2) |
|
587
|
|
|
|
|
|
|
{ |
|
588
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
589
|
0
|
|
|
|
|
0
|
$logger->logdie("error: oldCompId to vertex string record does not have two values.\n"); |
|
590
|
|
|
|
|
|
|
} |
|
591
|
1159
|
50
|
|
|
|
7420
|
$mergeSorter->feed($oldCompIdToVertexRecord[0] . $oldCompIdToVertexDelimiter . $oldCompIdToVertexRecord[1]) |
|
592
|
|
|
|
|
|
|
if $previousOldCompIdToVertexString ne $oldCompIdToVertexString; |
|
593
|
1159
|
|
|
|
|
4406
|
$previousOldCompIdToVertexString = $oldCompIdToVertexString; |
|
594
|
|
|
|
|
|
|
} |
|
595
|
6
|
|
|
|
|
87
|
close $oldCompIdToVertexFileHandle; |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# sort the edges. |
|
599
|
6
|
|
|
|
|
37
|
$mergeSorter->finish; |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
{ |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# open the file to write the Vertex to NewCompId pairs. |
|
604
|
6
|
|
|
|
|
1445
|
my $vertexToNewCompIdFileHandle; |
|
|
6
|
|
|
|
|
10
|
|
|
605
|
6
|
50
|
|
|
|
1185
|
unless (open($vertexToNewCompIdFileHandle, '>', $VertexToNewCompIdFile)) |
|
606
|
|
|
|
|
|
|
{ |
|
607
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
608
|
0
|
|
|
|
|
0
|
$logger->logdie("error: could not open file '$VertexToNewCompIdFile' for writing.\n"); |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
# get first record pair from the sorter. |
|
612
|
6
|
|
|
|
|
14
|
my $previousOldToNewOrOldVertexString = ''; |
|
613
|
6
|
|
|
|
|
29
|
my $oldToNewOrOldVertexString = $mergeSorter->fetch; |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# split the string into a record. |
|
616
|
6
|
|
|
|
|
13
|
my $oldToNewOrOldVertexRecord; |
|
617
|
|
|
|
|
|
|
my @listOfOldToNewOrOldVertexRecords; |
|
618
|
6
|
50
|
|
|
|
21
|
if (defined $oldToNewOrOldVertexString) |
|
619
|
|
|
|
|
|
|
{ |
|
620
|
6
|
|
|
|
|
45
|
$oldToNewOrOldVertexRecord = [ split(/$delimiter/, $oldToNewOrOldVertexString) ]; |
|
621
|
6
|
|
|
|
|
20
|
@listOfOldToNewOrOldVertexRecords = ($oldToNewOrOldVertexRecord); |
|
622
|
|
|
|
|
|
|
} |
|
623
|
|
|
|
|
|
|
|
|
624
|
6
|
|
|
|
|
35
|
while (defined($oldToNewOrOldVertexString = $mergeSorter->fetch)) |
|
625
|
|
|
|
|
|
|
{ |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# convert the string to a record. |
|
628
|
1823
|
|
|
|
|
7503
|
my $oldToNewOrOldVertexRecord = [ split(/$delimiter/, $oldToNewOrOldVertexString) ]; |
|
629
|
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
# when the compId changes, process the records in the list. |
|
631
|
1823
|
100
|
|
|
|
5586
|
if ($listOfOldToNewOrOldVertexRecords[-1]->[0] ne $oldToNewOrOldVertexRecord->[0]) |
|
632
|
|
|
|
|
|
|
{ |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# remap the oldCompId of each vertex to the newCompId and write it to the file. |
|
635
|
670
|
|
|
|
|
1583
|
$Self->mapComponentsOfSubgraphToVerticesInList(\@listOfOldToNewOrOldVertexRecords, $vertexToNewCompIdFileHandle); |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# empty the cache of records. |
|
638
|
670
|
|
|
|
|
2106
|
@listOfOldToNewOrOldVertexRecords = (); |
|
639
|
|
|
|
|
|
|
} |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# cache the record if unique. |
|
642
|
1823
|
50
|
|
|
|
4406
|
if ($oldToNewOrOldVertexString ne $previousOldToNewOrOldVertexString) |
|
643
|
|
|
|
|
|
|
{ |
|
644
|
1823
|
|
|
|
|
2146
|
push @listOfOldToNewOrOldVertexRecords, $oldToNewOrOldVertexRecord; |
|
645
|
1823
|
|
|
|
|
13406
|
$previousOldToNewOrOldVertexString = $oldToNewOrOldVertexString; |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
} |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
# remap the oldCompId of each vertex to the newCompId and write it to the file. |
|
651
|
6
|
|
|
|
|
146
|
$Self->mapComponentsOfSubgraphToVerticesInList(\@listOfOldToNewOrOldVertexRecords, $vertexToNewCompIdFileHandle); |
|
652
|
6
|
|
|
|
|
125
|
@listOfOldToNewOrOldVertexRecords = (); |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# close the file. |
|
655
|
6
|
|
|
|
|
1543
|
close $vertexToNewCompIdFileHandle; |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
6
|
|
|
|
|
65
|
return undef; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub mapComponentsOfSubgraphToVerticesInList # (\@listOfOldToNewOrOldVertexRecords, $vertexToNewCompIdFileHandle); |
|
662
|
|
|
|
|
|
|
{ |
|
663
|
676
|
|
|
676
|
0
|
1273
|
my ($Self, $ListOfOldToNewOrOldVertexRecords, $VertexToNewCompIdFileHandle) = @_; |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
# get the string record delimiter. |
|
666
|
676
|
|
|
|
|
1080
|
my $delimiter = $Self->{delimiter}; |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
# separate the Cc and Cn records. |
|
669
|
676
|
|
|
|
|
730
|
my $totalRecords = @$ListOfOldToNewOrOldVertexRecords; |
|
670
|
676
|
|
|
|
|
646
|
my $indexOfFirstCnRecord = 0; |
|
671
|
676
|
|
|
|
|
1660
|
for ($indexOfFirstCnRecord = 0 ; $indexOfFirstCnRecord < @$ListOfOldToNewOrOldVertexRecords ; $indexOfFirstCnRecord++) |
|
672
|
|
|
|
|
|
|
{ |
|
673
|
1111
|
100
|
|
|
|
4448
|
last if ($ListOfOldToNewOrOldVertexRecords->[$indexOfFirstCnRecord][1] eq 'cn'); |
|
674
|
|
|
|
|
|
|
} |
|
675
|
676
|
|
|
|
|
928
|
my $totalCcRecords = $indexOfFirstCnRecord; |
|
676
|
676
|
|
|
|
|
731
|
my $totalCnRecords = $totalRecords - $totalCcRecords; |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# if there are no oldCompIdToVertex records in the list, there is nothing to do. |
|
679
|
676
|
100
|
|
|
|
1546
|
if ($totalCnRecords == 0) |
|
680
|
|
|
|
|
|
|
{ |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
#my $logger = Log::Log4perl->get_logger(); |
|
683
|
|
|
|
|
|
|
#$logger->info ("info: no oldCompId to vertex records in list.\n"); |
|
684
|
235
|
|
|
|
|
425
|
return undef; |
|
685
|
|
|
|
|
|
|
} |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# if there is more than one oldCompIdToNewCompId record in the list, log the info. |
|
688
|
441
|
50
|
|
|
|
1168
|
if ($totalCcRecords > 1) |
|
689
|
|
|
|
|
|
|
{ |
|
690
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
691
|
0
|
|
|
|
|
0
|
$logger->info("info: there were $totalCcRecords oldCompId-newCompId records in the list.\n"); |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# if $totalCcRecords is zero, there are no oldCompIdToNewCompId mapping records, |
|
695
|
|
|
|
|
|
|
# so then just add the oldCompIdToVertex records to the file. |
|
696
|
441
|
100
|
|
|
|
670
|
if ($totalCcRecords == 0) |
|
697
|
|
|
|
|
|
|
{ |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# write each node,comp record to $VertexToNewCompIdFileHandle. |
|
700
|
6
|
|
|
|
|
14
|
my $previousRecord = ''; |
|
701
|
6
|
|
|
|
|
21
|
for (my $i = $indexOfFirstCnRecord ; $i < $totalRecords ; $i++) |
|
702
|
|
|
|
|
|
|
{ |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
# convert the record to a string. |
|
705
|
36
|
|
|
|
|
70
|
my $recordString = $ListOfOldToNewOrOldVertexRecords->[$i][2] . $delimiter . $ListOfOldToNewOrOldVertexRecords->[$i][0]; |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# skip duplicate records. |
|
708
|
36
|
50
|
|
|
|
69
|
next if $previousRecord eq $recordString; |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# print the record. |
|
711
|
36
|
|
|
|
|
212
|
print $VertexToNewCompIdFileHandle $recordString . $el; |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# store a copy of the record to remove duplicates. |
|
714
|
36
|
|
|
|
|
85
|
$previousRecord = $recordString; |
|
715
|
|
|
|
|
|
|
} |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
else |
|
718
|
|
|
|
|
|
|
{ |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# get the newCompId. |
|
721
|
435
|
|
|
|
|
592
|
my $newCompId = $ListOfOldToNewOrOldVertexRecords->[0][2]; |
|
722
|
435
|
|
|
|
|
505
|
my $previousRecord = ''; |
|
723
|
|
|
|
|
|
|
|
|
724
|
435
|
|
|
|
|
1126
|
for (my $i = $indexOfFirstCnRecord ; $i < $totalRecords ; $i++) |
|
725
|
|
|
|
|
|
|
{ |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
# convert the record to a string. |
|
728
|
1123
|
|
|
|
|
2673
|
my $recordString = $ListOfOldToNewOrOldVertexRecords->[$i][2] . $delimiter . $newCompId; |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# skip duplicate records. |
|
731
|
1123
|
50
|
|
|
|
2222
|
next if $previousRecord eq $recordString; |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# print the record. |
|
734
|
1123
|
|
|
|
|
2116
|
print $VertexToNewCompIdFileHandle $recordString . $el; |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
# store a copy of the record to remove duplicates. |
|
737
|
1123
|
|
|
|
|
2982
|
$previousRecord = $recordString; |
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
|
|
741
|
441
|
|
|
|
|
1026
|
return undef; |
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub writeSubgraphInfoToFiles |
|
745
|
|
|
|
|
|
|
{ |
|
746
|
6
|
|
|
6
|
0
|
14
|
my ($Self, $OldCompIdToOldSorter, $CompIdVertexFile) = @_; |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# open the oldCompId to vertex file for writing. |
|
749
|
6
|
|
|
|
|
9
|
my $oldCompIdToVertexFileHandle; |
|
750
|
6
|
50
|
|
|
|
376
|
unless (open($oldCompIdToVertexFileHandle, '>', $CompIdVertexFile)) |
|
751
|
|
|
|
|
|
|
{ |
|
752
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
753
|
0
|
|
|
|
|
0
|
$logger->logdie("could not open file '$CompIdVertexFile' for writing.\n"); |
|
754
|
|
|
|
|
|
|
} |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# get the vertex component-id sorter. |
|
757
|
6
|
|
|
|
|
18
|
my $vertexCompIdSorter = $Self->{vertexCompIdSorter}; |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
# counts the number of edges in the subgraph generated ($OldCompIdToOldCompIdFile) |
|
760
|
6
|
|
|
|
|
16
|
my $totalSubgraphEdges = 0; |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# used to skip duplicated vertexCompId edges. |
|
763
|
6
|
|
|
|
|
22
|
my $previousVertexCompIdString = ''; |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
# get the first vertexCompId as a string. |
|
766
|
6
|
|
|
|
|
30
|
my $vertexCompIdString = $vertexCompIdSorter->fetch; |
|
767
|
6
|
50
|
|
|
|
72
|
my $vertexCompIdPair = [ split($Self->{delimiter}, $vertexCompIdString) ] if defined $vertexCompIdString; |
|
768
|
6
|
|
|
|
|
15
|
my @listOfVertexCompIdPairs = ($vertexCompIdPair); |
|
769
|
6
|
|
|
|
|
34
|
while (defined($vertexCompIdString = $vertexCompIdSorter->fetch)) |
|
770
|
|
|
|
|
|
|
{ |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# extract the vertex and component id from the string. |
|
773
|
3275
|
|
|
|
|
10417
|
my $vertexCompIdPair = [ split($Self->{delimiter}, $vertexCompIdString) ]; |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
# when the vertex changes the pairs in @listOfVertexCompIdPairs are used |
|
776
|
|
|
|
|
|
|
# to create part of the subgraph. |
|
777
|
3275
|
100
|
|
|
|
8563
|
if ($listOfVertexCompIdPairs[-1]->[0] ne $vertexCompIdPair->[0]) |
|
778
|
|
|
|
|
|
|
{ |
|
779
|
1153
|
|
|
|
|
2662
|
$Self->writeSubgraphInfoToFilesFromList(\@listOfVertexCompIdPairs, $OldCompIdToOldSorter, |
|
780
|
|
|
|
|
|
|
$oldCompIdToVertexFileHandle, \$totalSubgraphEdges); |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
# clear the list of pairs. |
|
783
|
1153
|
|
|
|
|
2629
|
@listOfVertexCompIdPairs = (); |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# only store unique pairs. |
|
787
|
3275
|
100
|
|
|
|
11929
|
if ($previousVertexCompIdString ne $vertexCompIdString) |
|
788
|
|
|
|
|
|
|
{ |
|
789
|
2145
|
|
|
|
|
2553
|
push @listOfVertexCompIdPairs, $vertexCompIdPair; |
|
790
|
2145
|
|
|
|
|
8410
|
$previousVertexCompIdString = $vertexCompIdString; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
} |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# process any remaining pairs in @listOfVertexCompIdPairs. |
|
795
|
6
|
|
|
|
|
164
|
$Self->writeSubgraphInfoToFilesFromList(\@listOfVertexCompIdPairs, $OldCompIdToOldSorter, |
|
796
|
|
|
|
|
|
|
$oldCompIdToVertexFileHandle, \$totalSubgraphEdges); |
|
797
|
6
|
|
|
|
|
18
|
@listOfVertexCompIdPairs = (); |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
# done with the sorter. |
|
800
|
6
|
|
|
|
|
24
|
delete $Self->{vertexCompIdSorter}; |
|
801
|
|
|
|
|
|
|
|
|
802
|
6
|
|
|
|
|
70
|
return undef; |
|
803
|
|
|
|
|
|
|
} |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
sub writeSubgraphInfoToFilesFromList |
|
806
|
|
|
|
|
|
|
{ |
|
807
|
1159
|
|
|
1159
|
0
|
1793
|
my ($Self, $ListOfVertexCompIdPairs, $OldCompIdToOldSorter, $OldCompIdToVertexFileHandle, $TotalSubgraphEdges) = @_; |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
# the first record has the minimum component id. |
|
810
|
1159
|
|
|
|
|
1650
|
my $minCompId = $ListOfVertexCompIdPairs->[0]->[1]; |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
# compute the edges of the subgraph. |
|
813
|
1159
|
|
|
|
|
1201
|
my @listOfSubgraphEdgesA; |
|
814
|
|
|
|
|
|
|
my @listOfSubgraphEdgesB; |
|
815
|
1159
|
|
|
|
|
2806
|
for (my $i = 1 ; $i < @$ListOfVertexCompIdPairs ; $i++) |
|
816
|
|
|
|
|
|
|
{ |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
# get the component-id for the pair. |
|
819
|
992
|
|
|
|
|
1395
|
my $compId = $ListOfVertexCompIdPairs->[$i]->[1]; |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
# add the new edge. |
|
822
|
992
|
|
|
|
|
2148
|
push @listOfSubgraphEdgesA, join($Self->{delimiter}, $minCompId, $compId); |
|
823
|
992
|
|
|
|
|
1431
|
++$$TotalSubgraphEdges; |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
# we need to add the symmetric edge also to ensure log convergence. |
|
826
|
992
|
100
|
|
|
|
1878
|
if ($minCompId ne $compId) |
|
827
|
|
|
|
|
|
|
{ |
|
828
|
986
|
|
|
|
|
1853
|
push @listOfSubgraphEdgesB, join($Self->{delimiter}, $compId, $minCompId); |
|
829
|
986
|
|
|
|
|
2989
|
++$$TotalSubgraphEdges; |
|
830
|
|
|
|
|
|
|
} |
|
831
|
|
|
|
|
|
|
} |
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
# write the edges to the file. |
|
834
|
1159
|
|
|
|
|
1821
|
push @listOfSubgraphEdgesA, sort @listOfSubgraphEdgesB; |
|
835
|
1159
|
|
|
|
|
1541
|
@listOfSubgraphEdgesB = (); |
|
836
|
1159
|
|
|
|
|
5562
|
$OldCompIdToOldSorter->feed (@listOfSubgraphEdgesA); |
|
837
|
1159
|
|
|
|
|
1909
|
@listOfSubgraphEdgesA = (); |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
# write the compId,vertex to the file. |
|
840
|
1159
|
|
|
|
|
2620
|
my $record = join($Self->{delimiter}, $minCompId, $ListOfVertexCompIdPairs->[0]->[0]) . $el; |
|
841
|
1159
|
|
|
|
|
8227
|
print $OldCompIdToVertexFileHandle $record; |
|
842
|
|
|
|
|
|
|
|
|
843
|
1159
|
|
|
|
|
2287
|
return undef; |
|
844
|
|
|
|
|
|
|
} |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
sub outputVertexCompId |
|
847
|
|
|
|
|
|
|
{ |
|
848
|
1
|
|
|
1
|
0
|
3
|
my ($Self, %Parameters) = @_; |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
# set the default delimiter. |
|
851
|
1
|
|
|
|
|
3
|
my $delimiter = $Self->{delimiter}; |
|
852
|
1
|
50
|
|
|
|
4
|
$delimiter = $Parameters{delimiter} if exists $Parameters{delimiter}; |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
# get the list of vertices and component ids. |
|
855
|
1
|
|
|
|
|
5
|
my $listOfVertexCompIds = $Self->{componenter}->get_vertexCompIdPairs(0); |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
# sort the list of vertices and component ids. |
|
858
|
1
|
50
|
|
|
|
4
|
$listOfVertexCompIds = [ sort { ($a->[0] cmp $b->[0]) || $a->[1] cmp $b->[1] } @$listOfVertexCompIds ]; |
|
|
9
|
|
|
|
|
20
|
|
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
# open the component file for writing. |
|
861
|
1
|
|
|
|
|
2
|
my $outputFh; |
|
862
|
1
|
50
|
|
1
|
|
11
|
unless (open($outputFh, '>:encoding(utf8)', $Self->{outputFile})) |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
9
|
|
|
|
1
|
|
|
|
|
55
|
|
|
863
|
|
|
|
|
|
|
{ |
|
864
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
|
865
|
0
|
|
|
|
|
0
|
$logger->logdie("could not open file '$Self->{outputFile}' for writing.\n"); |
|
866
|
|
|
|
|
|
|
} |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
# write the vertex compId to the file. |
|
869
|
1
|
|
|
|
|
72243
|
foreach my $vertexCompId (@$listOfVertexCompIds) |
|
870
|
|
|
|
|
|
|
{ |
|
871
|
6
|
|
|
|
|
38
|
print $outputFh $vertexCompId->[0] . $delimiter . $vertexCompId->[1] . $el; |
|
872
|
|
|
|
|
|
|
} |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
# close the output file of edges. |
|
875
|
1
|
|
|
|
|
749
|
close $outputFh; |
|
876
|
|
|
|
|
|
|
|
|
877
|
1
|
|
|
|
|
19
|
return undef; |
|
878
|
|
|
|
|
|
|
} |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
sub printSorter |
|
881
|
|
|
|
|
|
|
{ |
|
882
|
0
|
|
|
0
|
0
|
0
|
my ($Self, $Sorter) = @_; |
|
883
|
|
|
|
|
|
|
|
|
884
|
0
|
|
|
|
|
0
|
my $previousRecord = ''; |
|
885
|
0
|
|
|
|
|
0
|
while (defined(my $recordString = $Sorter->fetch)) |
|
886
|
|
|
|
|
|
|
{ |
|
887
|
0
|
0
|
|
|
|
0
|
next if $previousRecord eq $recordString; |
|
888
|
0
|
|
|
|
|
0
|
$previousRecord = $recordString; |
|
889
|
0
|
|
|
|
|
0
|
print $recordString . $el; |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
|
|
892
|
0
|
|
|
|
|
0
|
return undef; |
|
893
|
|
|
|
|
|
|
} |
|
894
|
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
sub getCpuTime # ($startTime) |
|
896
|
|
|
|
|
|
|
{ |
|
897
|
14
|
|
|
14
|
0
|
27
|
my $startTime = 0; |
|
898
|
14
|
100
|
|
|
|
57
|
$startTime = $_[1] if exists $_[1]; |
|
899
|
14
|
|
|
|
|
221
|
return Time::HiRes::clock() - $startTime; |
|
900
|
|
|
|
|
|
|
} |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
sub DESTROY |
|
903
|
|
|
|
|
|
|
{ |
|
904
|
7
|
|
|
7
|
|
3312
|
my ($Self) = @_; |
|
905
|
7
|
100
|
|
|
|
395
|
return undef if $Self->{level} > 0; |
|
906
|
1
|
50
|
|
|
|
4
|
return undef unless $Self->{cleanup}; |
|
907
|
1
|
50
|
|
|
|
5
|
return undef unless exists $Self->{baseDirectory}; |
|
908
|
1
|
50
|
|
|
|
30
|
return undef unless -e $Self->{baseDirectory}; |
|
909
|
1
|
|
|
|
|
525
|
remove_tree($Self->{baseDirectory}); |
|
910
|
|
|
|
|
|
|
|
|
911
|
1
|
50
|
|
|
|
285
|
return undef unless $Self->{unlinkWorkingDirectory}; |
|
912
|
0
|
|
|
|
|
|
remove_tree($Self->{workingDirectory}); |
|
913
|
0
|
|
|
|
|
|
return undef; |
|
914
|
|
|
|
|
|
|
} |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
Use L to install the module and all its prerequisites: |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
perl -MCPAN -e shell |
|
921
|
|
|
|
|
|
|
cpan[1]> install Graph::Undirected::Components |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=head1 BUGS |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
Please email bugs reports or feature requests to C, or through |
|
926
|
|
|
|
|
|
|
the web interface at L. The author |
|
927
|
|
|
|
|
|
|
will be notified and you can be automatically notified of progress on the bug fix or feature request. |
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=head1 AUTHOR |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
Jeff Kubina |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
Copyright (c) 2009 Jeff Kubina. All rights reserved. |
|
936
|
|
|
|
|
|
|
This program is free software; you can redistribute |
|
937
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
The full text of the license can be found in the |
|
940
|
|
|
|
|
|
|
LICENSE file included with this module. |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
=head1 KEYWORDS |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
connected components, network, undirected graph |
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
L, L, L, L |
|
949
|
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
=begin html |
|
951
|
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
connected component, |
|
953
|
|
|
|
|
|
|
graph, |
|
954
|
|
|
|
|
|
|
network, |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=end html |
|
957
|
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=cut |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
1; |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# The preceding line will help the module return a true value |