| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Data::Range::Compare::Stream::Iterator::File::MergeSortAsc; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# based on http://en.wikipedia.org/wiki/Merge_sort |
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
1461
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
55
|
|
|
6
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
43
|
|
|
7
|
2
|
|
|
2
|
|
8
|
use Carp qw(croak); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
85
|
|
|
8
|
2
|
|
|
2
|
|
853
|
use IO::File; |
|
|
2
|
|
|
|
|
10217
|
|
|
|
2
|
|
|
|
|
344
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
12
|
use Data::Range::Compare::Stream::Sort; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
146
|
|
|
11
|
2
|
|
|
2
|
|
1229
|
use Data::Range::Compare::Stream::Iterator::Stack; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
45
|
|
|
12
|
2
|
|
|
2
|
|
10
|
use Data::Range::Compare::Stream::Iterator::Array; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
37
|
|
|
13
|
2
|
|
|
2
|
|
1193
|
use Data::Range::Compare::Stream::Iterator::File::MergeSortAsc::Stack; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
67
|
|
|
14
|
2
|
|
|
2
|
|
627
|
use Data::Range::Compare::Stream::Iterator::File; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
114
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
2
|
|
|
2
|
|
12
|
use base qw(Data::Range::Compare::Stream::Iterator::Base Data::Range::Compare::Stream::Iterator::File::Temp); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
3261
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new { |
|
21
|
13
|
|
|
13
|
0
|
1013
|
my ($class,%args)=@_; |
|
22
|
13
|
|
|
|
|
119
|
my $self=$class->SUPER::new( |
|
23
|
|
|
|
|
|
|
bucket_size=>4000, |
|
24
|
|
|
|
|
|
|
NEW_ITERATOR_FROM=>'Data::Range::Compare::Stream::Iterator::File', |
|
25
|
|
|
|
|
|
|
NEW_ARRAY_ITERATOR_FROM=>'Data::Range::Compare::Stream::Iterator::Array', |
|
26
|
|
|
|
|
|
|
NEW_FROM=>'Data::Range::Compare::Stream', |
|
27
|
|
|
|
|
|
|
file_list=>[], |
|
28
|
|
|
|
|
|
|
iterator_list=>[], |
|
29
|
|
|
|
|
|
|
auto_prepare=>0, |
|
30
|
|
|
|
|
|
|
unlink_result_file=>1, |
|
31
|
|
|
|
|
|
|
prepared=>0, |
|
32
|
|
|
|
|
|
|
%args |
|
33
|
|
|
|
|
|
|
); |
|
34
|
|
|
|
|
|
|
|
|
35
|
13
|
50
|
|
|
|
62
|
if($self->{bucket_size} < 1) { |
|
36
|
0
|
|
|
|
|
0
|
croak 'bucket_size < 1';; |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
13
|
|
|
|
|
27
|
my $it_list=$self->{iterator_list}; |
|
40
|
13
|
100
|
|
|
|
47
|
if($args{filename}) { |
|
41
|
4
|
|
|
|
|
6
|
push @{$self->{file_list}},$args{filename}; |
|
|
4
|
|
|
|
|
14
|
|
|
42
|
|
|
|
|
|
|
} |
|
43
|
13
|
|
|
|
|
24
|
foreach my $filename (@{$self->{file_list}}) { |
|
|
13
|
|
|
|
|
47
|
|
|
44
|
13
|
|
|
|
|
43
|
my $it=$self->create_file_iterator(filename=>$filename); |
|
45
|
13
|
50
|
|
|
|
55
|
croak "Could not open: [$filename]" if $it->in_error; |
|
46
|
13
|
|
|
|
|
41
|
push @$it_list,$it; |
|
47
|
|
|
|
|
|
|
} |
|
48
|
13
|
100
|
|
|
|
47
|
$self->prepare if $self->{auto_prepare}; |
|
49
|
|
|
|
|
|
|
|
|
50
|
13
|
|
|
|
|
46
|
return $self; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
122
|
|
|
122
|
1
|
1531
|
sub NEW_FROM { $_[0]->{NEW_FROM} } |
|
54
|
|
|
|
|
|
|
|
|
55
|
97
|
|
|
97
|
1
|
1297
|
sub NEW_ARRAY_ITERATOR_FROM { $_[0]->{NEW_ARRAY_ITERATOR_FROM} } |
|
56
|
|
|
|
|
|
|
|
|
57
|
112
|
|
|
112
|
1
|
499
|
sub NEW_ITERATOR_FROM { $_[0]->{NEW_ITERATOR_FROM} } |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub create_file_iterator { |
|
60
|
112
|
|
|
112
|
0
|
1058
|
my ($self,%args)=@_; |
|
61
|
|
|
|
|
|
|
|
|
62
|
112
|
|
|
|
|
324
|
my $obj=$self->NEW_ITERATOR_FROM->new( |
|
63
|
|
|
|
|
|
|
NEW_FROM=>$self->NEW_FROM, |
|
64
|
|
|
|
|
|
|
parse_line=>$self->{parse_line}, |
|
65
|
|
|
|
|
|
|
result_to_line=>$self->{result_to_line}, |
|
66
|
|
|
|
|
|
|
factory_instance=>$self->{factory_instance}, |
|
67
|
|
|
|
|
|
|
%args |
|
68
|
|
|
|
|
|
|
); |
|
69
|
|
|
|
|
|
|
|
|
70
|
112
|
|
|
|
|
460
|
return $obj; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub prepare { |
|
74
|
8256
|
|
|
8256
|
0
|
23639
|
my ($self)=@_; |
|
75
|
8256
|
100
|
|
|
|
25668
|
return if $self->{prepared}; |
|
76
|
13
|
|
|
|
|
28
|
$self->{prepared}=1; |
|
77
|
|
|
|
|
|
|
|
|
78
|
13
|
|
|
|
|
24
|
my $it_list=$self->{iterator_list}; |
|
79
|
|
|
|
|
|
|
|
|
80
|
13
|
|
|
|
|
45
|
$self->create_stack($it_list); |
|
81
|
13
|
|
|
|
|
241
|
my $result_file=$self->walk_stack; |
|
82
|
13
|
|
|
|
|
35
|
$self->{result_file}=$result_file; |
|
83
|
13
|
|
|
|
|
24
|
$self->{stack}=undef; |
|
84
|
13
|
|
|
|
|
55
|
$self->{iterator}=$self->create_file_iterator(filename=>$result_file); |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
3
|
|
|
3
|
0
|
11
|
sub get_result_file { $_[0]->prepare;$_[0]->{result_file} } |
|
|
3
|
|
|
|
|
78
|
|
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub DESTROY { |
|
90
|
13
|
|
|
13
|
|
5200
|
my ($self)=@_; |
|
91
|
13
|
50
|
|
|
|
44
|
return unless defined($self); |
|
92
|
|
|
|
|
|
|
|
|
93
|
13
|
50
|
|
|
|
50
|
if(defined($self->{result_file})) { |
|
94
|
13
|
|
|
|
|
28
|
$self->{iterator}=undef; |
|
95
|
13
|
100
|
|
|
|
68
|
unlink $self->{result_file} if $self->{unlink_result_file}; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
13
|
50
|
|
|
|
337
|
if(defined($self->{stack})) { |
|
99
|
0
|
|
|
|
|
0
|
my $stack=$self->{stack}; |
|
100
|
0
|
|
|
|
|
0
|
while($stack->has_next) { |
|
101
|
0
|
|
|
|
|
0
|
unlink $stack->get_next; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
0
|
|
|
|
|
0
|
$self->{stack}=undef; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
8252
|
|
|
8252
|
0
|
50429
|
sub has_next { $_[0]->prepare;$_[0]->{iterator}->has_next } |
|
|
8252
|
|
|
|
|
37299
|
|
|
108
|
8244
|
50
|
|
8244
|
1
|
43197
|
sub get_next { return undef unless defined($_[0]->{iterator});$_[0]->{iterator}->get_next } |
|
|
8244
|
|
|
|
|
30814
|
|
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub walk_stack { |
|
112
|
13
|
|
|
13
|
0
|
24
|
my ($self)=@_; |
|
113
|
|
|
|
|
|
|
|
|
114
|
13
|
|
|
|
|
29
|
my $stack=$self->{stack}; |
|
115
|
13
|
|
|
|
|
66
|
while($stack->has_next==2) { |
|
116
|
43
|
|
|
|
|
216
|
my $left=$stack->get_next; |
|
117
|
43
|
|
|
|
|
204
|
my $right=$stack->get_next; |
|
118
|
43
|
|
|
|
|
405
|
my $left_fh=IO::File->new($left,'r'); |
|
119
|
43
|
|
|
|
|
5975
|
my $right_fh=IO::File->new($right,'r'); |
|
120
|
43
|
|
|
|
|
3782
|
my $it_left=$self->create_file_iterator(fh=>$left_fh); |
|
121
|
43
|
|
|
|
|
135
|
my $it_right=$self->create_file_iterator(fh=>$right_fh); |
|
122
|
|
|
|
|
|
|
|
|
123
|
43
|
|
|
|
|
229
|
$stack->push($self->merge($it_left,$it_right)); |
|
124
|
|
|
|
|
|
|
|
|
125
|
43
|
|
|
|
|
192
|
$right_fh->close; |
|
126
|
43
|
|
|
|
|
767
|
$left_fh->close; |
|
127
|
43
|
|
|
|
|
11866
|
unlink $right,$left; |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
} |
|
130
|
13
|
|
|
|
|
54
|
my $next=$stack->get_next; |
|
131
|
13
|
|
|
|
|
33
|
$next; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub create_stack { |
|
135
|
13
|
|
|
13
|
0
|
21
|
my ($self,$list)=@_; |
|
136
|
|
|
|
|
|
|
|
|
137
|
13
|
|
|
|
|
225
|
my $merged=new Data::Range::Compare::Stream::Iterator::File::MergeSortAsc::Stack(tmpdir=>$self->{tmpdir}); |
|
138
|
13
|
|
|
|
|
43
|
$self->{stack}=$merged; |
|
139
|
|
|
|
|
|
|
|
|
140
|
13
|
|
|
|
|
145
|
my $stack=new Data::Range::Compare::Stream::Iterator::Stack(stack=>$list); |
|
141
|
|
|
|
|
|
|
|
|
142
|
13
|
|
|
|
|
40
|
my $load_count=$self->{bucket_size}; |
|
143
|
|
|
|
|
|
|
|
|
144
|
13
|
|
|
|
|
26
|
my $left=[]; |
|
145
|
13
|
|
|
|
|
28
|
my $right=[]; |
|
146
|
|
|
|
|
|
|
|
|
147
|
13
|
|
|
|
|
60
|
while($stack->has_next) { |
|
148
|
8244
|
|
|
|
|
23697
|
my $result=$stack->get_next; |
|
149
|
|
|
|
|
|
|
|
|
150
|
8244
|
100
|
|
|
|
22773
|
if($#$left < $load_count) { |
|
151
|
4149
|
|
|
|
|
15484
|
push @$left,$result; |
|
152
|
|
|
|
|
|
|
} else { |
|
153
|
4095
|
100
|
|
|
|
7114
|
if($#$right < $load_count) { |
|
154
|
4054
|
|
|
|
|
15431
|
push @$right,$result; |
|
155
|
|
|
|
|
|
|
} else { |
|
156
|
41
|
|
|
|
|
473
|
@$left=sort { $self->sort_method($a,$b) } @$left; |
|
|
22071
|
|
|
|
|
37178
|
|
|
157
|
41
|
|
|
|
|
589
|
@$right=sort { $self->sort_method($a,$b) } @$right; |
|
|
22082
|
|
|
|
|
37263
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
41
|
|
|
|
|
318
|
my $it_left=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$left); |
|
160
|
41
|
|
|
|
|
173
|
my $it_right=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$right); |
|
161
|
|
|
|
|
|
|
|
|
162
|
41
|
|
|
|
|
361
|
$merged->push($self->merge($it_left,$it_right)); |
|
163
|
41
|
|
|
|
|
171
|
@$right=(); |
|
164
|
41
|
|
|
|
|
512
|
@$left=($result); |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
13
|
|
|
|
|
112
|
foreach my $array ($left,$right) { |
|
173
|
|
|
|
|
|
|
|
|
174
|
26
|
100
|
|
|
|
1612
|
next if $#$array==-1; |
|
175
|
|
|
|
|
|
|
|
|
176
|
15
|
|
|
|
|
92
|
@$array=sort { $self->sort_method($a,$b) } @$array; |
|
|
652
|
|
|
|
|
1059
|
|
|
177
|
|
|
|
|
|
|
|
|
178
|
15
|
|
|
|
|
59
|
my $it=$self->NEW_ARRAY_ITERATOR_FROM->new(factory_instance=>$self->{factory_instance},sorted=>1,range_list=>$array); |
|
179
|
15
|
|
|
|
|
74
|
my $tmp=$self->get_temp; |
|
180
|
|
|
|
|
|
|
|
|
181
|
15
|
|
|
|
|
7062
|
while($it->has_next) { |
|
182
|
160
|
|
|
|
|
485
|
my $result=$it->get_next; |
|
183
|
160
|
|
|
|
|
540
|
$tmp->print($self->result_to_line($result)); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
15
|
|
|
|
|
73
|
$merged->push($tmp->filename); |
|
187
|
15
|
|
|
|
|
71
|
$tmp->close; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub sort_method { |
|
194
|
97684
|
|
|
97684
|
1
|
141972
|
my ($self,$left_range,$right_range)=@_; |
|
195
|
|
|
|
|
|
|
|
|
196
|
97684
|
50
|
|
|
|
221478
|
return $self->{sort_func}->($left_range,$right_range) if $self->{sort_func}; |
|
197
|
97684
|
|
|
|
|
271938
|
my $cmp=sort_in_consolidate_order_asc($left_range->get_common,$right_range->get_common); |
|
198
|
|
|
|
|
|
|
|
|
199
|
97684
|
|
|
|
|
222989
|
return $cmp; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub merge { |
|
203
|
84
|
|
|
84
|
0
|
211
|
my ($self,$left,$right)=@_; |
|
204
|
|
|
|
|
|
|
|
|
205
|
84
|
|
|
|
|
696
|
my $tmp_result=$self->get_temp; |
|
206
|
|
|
|
|
|
|
|
|
207
|
84
|
|
|
|
|
50790
|
my ($left_range,$right_range); |
|
208
|
|
|
|
|
|
|
|
|
209
|
84
|
50
|
|
|
|
386
|
if($left->has_next) { |
|
210
|
84
|
|
|
|
|
1104
|
$left_range=$left->get_next; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
84
|
50
|
|
|
|
335
|
if($right->has_next) { |
|
214
|
84
|
|
|
|
|
258
|
$right_range=$right->get_next; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
|
218
|
84
|
|
100
|
|
|
408
|
while(defined($left_range) or defined($right_range)) { |
|
219
|
|
|
|
|
|
|
|
|
220
|
53097
|
100
|
100
|
|
|
231438
|
if(defined($left_range) and defined($right_range)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
|
222
|
52879
|
|
|
|
|
121413
|
my $cmp=$self->sort_method($left_range,$right_range); |
|
223
|
|
|
|
|
|
|
|
|
224
|
52879
|
100
|
|
|
|
101819
|
if($cmp!=1) { |
|
225
|
|
|
|
|
|
|
|
|
226
|
24456
|
|
|
|
|
82385
|
$tmp_result->print($self->result_to_line($left_range)); |
|
227
|
24456
|
|
|
|
|
156910
|
$left_range=undef; |
|
228
|
24456
|
100
|
|
|
|
80735
|
if($left->has_next) { |
|
229
|
24414
|
|
|
|
|
71032
|
$left_range=$left->get_next; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} else { |
|
232
|
|
|
|
|
|
|
|
|
233
|
28423
|
|
|
|
|
76882
|
$tmp_result->print($self->result_to_line($right_range)); |
|
234
|
28423
|
|
|
|
|
170304
|
$right_range=undef; |
|
235
|
28423
|
100
|
|
|
|
100446
|
if($right->has_next) { |
|
236
|
28381
|
|
|
|
|
73753
|
$right_range=$right->get_next; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
} elsif(defined($left_range)) { |
|
242
|
|
|
|
|
|
|
|
|
243
|
116
|
|
|
|
|
496
|
$tmp_result->print($self->result_to_line($left_range)); |
|
244
|
116
|
|
|
|
|
1475
|
$left_range=undef; |
|
245
|
116
|
100
|
|
|
|
384
|
if($left->has_next) { |
|
246
|
74
|
|
|
|
|
210
|
$left_range=$left->get_next; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
} elsif(defined($right_range)) { |
|
250
|
|
|
|
|
|
|
|
|
251
|
102
|
|
|
|
|
319
|
$tmp_result->print($self->result_to_line($right_range)); |
|
252
|
102
|
|
|
|
|
844
|
$right_range=undef; |
|
253
|
102
|
100
|
|
|
|
347
|
if($right->has_next) { |
|
254
|
60
|
|
|
|
|
181
|
$right_range=$right->get_next; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
84
|
|
|
|
|
601
|
my $result_name=$tmp_result->filename; |
|
261
|
84
|
|
|
|
|
1163
|
$tmp_result->close; |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
|
|
264
|
84
|
|
|
|
|
37633
|
return $result_name; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
1; |