line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Parse::ExuberantCTags::Merge; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
47439
|
use 5.006001; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
63
|
|
4
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
14
|
|
|
2
|
|
|
|
|
64
|
|
5
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
21
|
|
|
2
|
|
|
|
|
115
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '1.01'; |
8
|
2
|
|
|
2
|
|
10
|
use constant DEBUG => 0; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
174
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
10
|
use constant SMALL_DEFAULT => 2**22; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
84
|
|
11
|
2
|
|
|
2
|
|
10
|
use constant SUPER_SMALL_DEFAULT => 2**17; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
87
|
|
12
|
|
|
|
|
|
|
|
13
|
2
|
|
|
2
|
|
11
|
use constant FILENAME => 0; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
90
|
|
14
|
2
|
|
|
2
|
|
7
|
use constant SORTED => 1; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
152
|
|
15
|
|
|
|
|
|
|
|
16
|
2
|
|
|
2
|
|
8
|
use constant MRG_LINE => 0; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
92
|
|
17
|
2
|
|
|
2
|
|
20
|
use constant MRG_FH => 1; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
133
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Class::XSAccessor |
20
|
2
|
|
|
|
|
18
|
constructor => 'new', |
21
|
|
|
|
|
|
|
accessors => { |
22
|
|
|
|
|
|
|
small_size_threshold => 'small_size_threshold', |
23
|
|
|
|
|
|
|
super_small_size_threshold => 'super_small_size_threshold', |
24
|
|
|
|
|
|
|
tempdir => 'tempdir', |
25
|
2
|
|
|
2
|
|
1669
|
}; |
|
2
|
|
|
|
|
5819
|
|
26
|
|
|
|
|
|
|
|
27
|
2
|
|
|
2
|
|
568
|
use Carp (); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
29
|
|
28
|
2
|
|
|
2
|
|
2421
|
use File::Temp (); |
|
2
|
|
|
|
|
54941
|
|
|
2
|
|
|
|
|
58
|
|
29
|
2
|
|
|
2
|
|
37
|
use File::Spec (); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
33
|
|
30
|
2
|
|
|
2
|
|
2314
|
use Parse::ExuberantCTags::Merge::SimpleScopeGuard; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
3785
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub add_file { |
33
|
21
|
|
|
21
|
1
|
14360
|
my $self = shift; |
34
|
21
|
|
|
|
|
38
|
my $file = shift; |
35
|
21
|
50
|
|
|
|
55
|
Carp::croak("Need file argument") |
36
|
|
|
|
|
|
|
if not defined $file; |
37
|
21
|
50
|
|
|
|
319
|
Carp::croak("Input file '$file' does not exist") |
38
|
|
|
|
|
|
|
if not -f $file; |
39
|
|
|
|
|
|
|
|
40
|
21
|
|
|
|
|
91
|
my %opts = @_; |
41
|
21
|
|
|
|
|
34
|
my $sorted = $opts{sorted}; |
42
|
|
|
|
|
|
|
|
43
|
21
|
|
100
|
|
|
93
|
$self->{files} ||= []; |
44
|
21
|
|
|
|
|
28
|
push @{$self->{files}}, [$file, $sorted]; |
|
21
|
|
|
|
|
58
|
|
45
|
21
|
|
|
|
|
103
|
return(); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub write { |
50
|
12
|
|
|
12
|
0
|
696
|
my $self = shift; |
51
|
12
|
|
|
|
|
18
|
my $outfile = shift; |
52
|
12
|
50
|
|
|
|
29
|
Carp::croak("Need output file argument") |
53
|
|
|
|
|
|
|
if not defined $outfile; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# determine temporary directory |
56
|
12
|
|
|
|
|
30
|
my $tmpdir = $self->tempdir; |
57
|
12
|
50
|
33
|
|
|
39
|
if (not defined $tmpdir or not -d $tmpdir) { |
58
|
12
|
|
|
|
|
188
|
$tmpdir = File::Spec->tmpdir(); |
59
|
12
|
|
|
|
|
34
|
$self->tempdir($tmpdir); |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
12
|
|
|
|
|
53
|
my $total_size = 0; |
63
|
12
|
|
|
|
|
17
|
my $sorted_size = 0; |
64
|
12
|
|
|
|
|
16
|
my $unsorted_size = 0; |
65
|
12
|
|
|
|
|
18
|
my @sorted; |
66
|
|
|
|
|
|
|
my @unsorted; |
67
|
|
|
|
|
|
|
|
68
|
12
|
|
|
|
|
26
|
my $files = $self->{files}; |
69
|
12
|
50
|
33
|
|
|
61
|
Carp::croak("Need input files") |
70
|
|
|
|
|
|
|
if not defined $files or @$files == 0; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# only one sorted input file => copy |
73
|
12
|
100
|
100
|
|
|
53
|
if (@$files == 1 and $files->[0][SORTED]) { |
74
|
1
|
|
|
|
|
2
|
warn "Only one sorted input file => copying" if DEBUG; |
75
|
1
|
|
|
|
|
3
|
my $infile = $files->[0][FILENAME]; |
76
|
1
|
50
|
|
|
|
43
|
open my $fh, '<', $infile |
77
|
|
|
|
|
|
|
or die "Opening input file '$infile' for reading failed: $!"; |
78
|
1
|
50
|
|
|
|
65
|
open my $ofh, '>', $outfile |
79
|
|
|
|
|
|
|
or die "Opening output file '$outfile' for writing failed: $!"; |
80
|
|
|
|
|
|
|
|
81
|
1
|
|
|
|
|
16
|
print $ofh "!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted/\n"; |
82
|
|
|
|
|
|
|
|
83
|
1
|
|
|
|
|
5
|
local $/ = \1000000; |
84
|
1
|
|
|
|
|
60
|
while (<$fh>) { |
85
|
1
|
|
|
|
|
10
|
print $ofh $_; |
86
|
|
|
|
|
|
|
} |
87
|
1
|
|
|
|
|
20
|
close $fh; |
88
|
1
|
|
|
|
|
92
|
close $ofh; |
89
|
1
|
|
|
|
|
10
|
return(1); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# calculate the file sizes |
93
|
11
|
|
|
|
|
26
|
foreach my $file (@$files) { |
94
|
20
|
|
|
|
|
28
|
my $fname = $file->[FILENAME]; |
95
|
20
|
|
|
|
|
228
|
my $s = -s $fname; |
96
|
20
|
|
|
|
|
26
|
$total_size += $s; |
97
|
20
|
100
|
|
|
|
46
|
if ($file->[SORTED]) { |
98
|
9
|
|
|
|
|
14
|
$sorted_size += $s; |
99
|
9
|
|
|
|
|
26
|
push @sorted, $fname; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
else { |
102
|
11
|
|
|
|
|
13
|
$unsorted_size += $s; |
103
|
11
|
|
|
|
|
36
|
push @unsorted, $fname; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# get size thresholds |
108
|
11
|
|
|
|
|
27
|
my $threshold_super_small = $self->super_small_size_threshold(); |
109
|
11
|
100
|
|
|
|
27
|
$threshold_super_small = SUPER_SMALL_DEFAULT if not defined $threshold_super_small; |
110
|
11
|
|
|
|
|
26
|
my $threshold_small = $self->small_size_threshold(); |
111
|
11
|
100
|
|
|
|
26
|
$threshold_small = SMALL_DEFAULT if not defined $threshold_small; |
112
|
11
|
|
|
|
|
12
|
warn "Thresholds: tiny=$threshold_super_small small=$threshold_small" if DEBUG > 1; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# storage of temporary files and guard to clean them up on scope exit |
115
|
11
|
|
|
|
|
15
|
my @tmpfiles; |
116
|
11
|
|
|
|
|
128
|
my $guard = Parse::ExuberantCTags::Merge::SimpleScopeGuard->new(files => \@tmpfiles); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# select sort strategy |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# everything small, sort all in memory regardless |
121
|
11
|
100
|
|
|
|
26
|
if ($total_size < $threshold_super_small) { |
122
|
5
|
|
|
|
|
8
|
warn "Total size < super-small-threshold => memory sort" if DEBUG; |
123
|
5
|
50
|
|
|
|
305
|
open my $ofh, '>', $outfile |
124
|
|
|
|
|
|
|
or die "Could not open output file '$outfile' for writing: $!"; |
125
|
5
|
|
|
|
|
18
|
return $self->_memory_sort($ofh, @sorted, @unsorted); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# This must handle the unsorted files |
129
|
6
|
100
|
|
|
|
16
|
if (@unsorted) { |
130
|
4
|
|
|
|
|
6
|
warn "There are unsorted files..." if DEBUG; |
131
|
4
|
100
|
|
|
|
15
|
if ($unsorted_size < $threshold_small) { |
|
|
100
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# unsorted files are small and will be sorted in memory |
133
|
2
|
|
|
|
|
3
|
warn "Unsorted files small => memory sort" if DEBUG; |
134
|
2
|
|
|
|
|
2
|
my ($tfh, $tmpfile); |
135
|
2
|
100
|
|
|
|
5
|
if (@sorted) { # if there are sorted files (must be largish), use a tempfile |
136
|
1
|
|
|
|
|
5
|
($tfh, $tmpfile) = File::Temp::tempfile( |
137
|
|
|
|
|
|
|
"ctagsSortXXXXXX", UNLINK => 0, DIR => $tmpdir |
138
|
|
|
|
|
|
|
); |
139
|
1
|
|
|
|
|
365
|
push @tmpfiles, $tmpfile; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
else { # unsorted only => use real output file |
142
|
1
|
50
|
|
|
|
50
|
open $tfh, '>', $outfile |
143
|
|
|
|
|
|
|
or die "Could not open output file '$outfile' for writing: $!"; |
144
|
|
|
|
|
|
|
} |
145
|
2
|
|
|
|
|
8
|
$self->_memory_sort($tfh, @unsorted); |
146
|
2
|
|
|
|
|
71
|
close $tfh; |
147
|
2
|
100
|
|
|
|
7
|
if (not @sorted) { # only unsorted data => done! |
148
|
1
|
|
|
|
|
5
|
return 1; |
149
|
|
|
|
|
|
|
} |
150
|
1
|
|
|
|
|
3
|
push @sorted, $tmpfile; |
151
|
1
|
|
|
|
|
18
|
$sorted_size += -s $tmpfile; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
elsif ($sorted_size < $threshold_small) { |
154
|
|
|
|
|
|
|
# handle everything with Sort::External |
155
|
|
|
|
|
|
|
# don't bother with merge-sorting the small sorted files |
156
|
1
|
|
|
|
|
2
|
warn "Sorted files small or not existant => external sort for all" if DEBUG; |
157
|
1
|
50
|
|
|
|
46
|
open my $ofh, '>', $outfile |
158
|
|
|
|
|
|
|
or die "Could not open output file '$outfile' for writing: $!"; |
159
|
1
|
|
|
|
|
75
|
return $self->_external_sort($ofh, @unsorted, @sorted); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
else { |
162
|
|
|
|
|
|
|
# both are large. First do an external sort on the unsorted files, |
163
|
|
|
|
|
|
|
# then do a merge sort |
164
|
1
|
|
|
|
|
3
|
warn "potentially large files => external sort for unsorted files" if DEBUG; |
165
|
1
|
|
|
|
|
6
|
my ($tfh, $tmpfile) = File::Temp::tempfile( |
166
|
|
|
|
|
|
|
"ctagsSortXXXXXX", UNLINK => 0, DIR => $tmpdir |
167
|
|
|
|
|
|
|
); |
168
|
1
|
|
|
|
|
383
|
push @tmpfiles, $tmpfile; |
169
|
1
|
|
|
|
|
5
|
$self->_external_sort($tfh, @unsorted); |
170
|
1
|
|
|
|
|
300
|
close $tfh; |
171
|
1
|
|
|
|
|
3
|
push @sorted, $tmpfile; |
172
|
1
|
|
|
|
|
20
|
$sorted_size += -s $tmpfile; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} # end if there is unsorted data |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# at this point, there should be only sorted files |
177
|
|
|
|
|
|
|
# left => merge sort |
178
|
4
|
|
|
|
|
7
|
warn "running merge sort" if DEBUG; |
179
|
4
|
50
|
|
|
|
343
|
open my $ofh, '>', $outfile |
180
|
|
|
|
|
|
|
or die "Could not open output file '$outfile' for writing: $!"; |
181
|
|
|
|
|
|
|
|
182
|
4
|
|
|
|
|
14
|
return $self->_merge_sort($ofh, @sorted); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _merge_sort { |
187
|
4
|
|
|
4
|
|
6
|
warn "running _merge_sort" if DEBUG; |
188
|
4
|
|
|
|
|
7
|
my $self = shift; |
189
|
4
|
|
|
|
|
4
|
my $ofh = shift; |
190
|
4
|
|
|
|
|
11
|
my @infiles = @_; |
191
|
|
|
|
|
|
|
|
192
|
4
|
|
|
|
|
20
|
print $ofh "!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted/\n"; |
193
|
|
|
|
|
|
|
|
194
|
4
|
|
|
|
|
16
|
local $/ = "\n"; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# get the first lines and create a list of simple structs for sorting |
197
|
8
|
50
|
|
|
|
242
|
my @files = |
198
|
|
|
|
|
|
|
map { |
199
|
4
|
|
|
|
|
8
|
open my $fh, '<', $_ or die "Can't open input file '$_' for reading: $!"; |
200
|
8
|
|
|
|
|
138
|
my $first = <$fh>; |
201
|
8
|
100
|
|
|
|
28
|
$first = <$fh> if $first =~ /^!_TAG_FILE_SORTED\t/; # skip magic line |
202
|
8
|
|
|
|
|
31
|
[$first, $fh] |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
@infiles; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# initial sort of the first lines |
207
|
4
|
|
|
|
|
20
|
@files = sort {$a->[MRG_LINE] cmp $b->[MRG_LINE]} @files; |
|
4
|
|
|
|
|
13
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# keep sorting until all sources run out |
210
|
4
|
|
|
|
|
9
|
while (@files) { |
211
|
|
|
|
|
|
|
# first file in the list always has the next "lowest" line |
212
|
24
|
|
|
|
|
25
|
my $next = $files[0]; |
213
|
24
|
|
|
|
|
36
|
print $ofh $next->[MRG_LINE]; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# fetch a new line for this file handle |
216
|
24
|
|
|
|
|
30
|
my $fh = $next->[MRG_FH]; |
217
|
24
|
|
|
|
|
67
|
$next->[MRG_LINE] = <$fh>; |
218
|
24
|
100
|
|
|
|
43
|
if (not defined $next->[MRG_LINE]) { |
219
|
|
|
|
|
|
|
# eof, lose the file |
220
|
8
|
|
|
|
|
13
|
splice(@files, 0, 1); |
221
|
8
|
|
|
|
|
141
|
next; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# one pass of bubble sort to propagate the new line to its place |
225
|
16
|
|
|
|
|
47
|
for (my $i = 1; $i < @files; ++$i) { |
226
|
8
|
50
|
|
|
|
20
|
if (($files[$i-1][MRG_LINE] cmp $files[$i][MRG_LINE]) == 1) { |
227
|
0
|
|
|
|
|
0
|
my $tmp = $files[$i-1]; |
228
|
0
|
|
|
|
|
0
|
$files[$i-1] = $files[$i]; |
229
|
0
|
|
|
|
|
0
|
$files[$i] = $tmp; |
230
|
|
|
|
|
|
|
} else { |
231
|
8
|
|
|
|
|
21
|
last; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} # end while there are files |
235
|
|
|
|
|
|
|
|
236
|
4
|
|
|
|
|
278
|
return(1); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _external_sort { |
242
|
2
|
|
|
2
|
|
4
|
warn "running _external_sort" if DEBUG; |
243
|
2
|
|
|
|
|
4
|
my $self = shift; |
244
|
2
|
|
|
|
|
3
|
my $ofh = shift; |
245
|
2
|
|
|
|
|
6
|
my @infiles = @_; |
246
|
|
|
|
|
|
|
|
247
|
2
|
|
|
|
|
11
|
print $ofh "!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted/\n"; |
248
|
|
|
|
|
|
|
|
249
|
2
|
|
|
|
|
921
|
require Sort::External; |
250
|
2
|
|
|
|
|
3308
|
my $exsort = Sort::External->new( |
251
|
|
|
|
|
|
|
mem_threshold => 1024**2 * 32, # todo: configuration |
252
|
|
|
|
|
|
|
); |
253
|
|
|
|
|
|
|
|
254
|
2
|
|
|
|
|
1040
|
local $/ = "\n"; |
255
|
2
|
|
|
|
|
5
|
foreach my $infile (@infiles) { |
256
|
3
|
50
|
|
|
|
96
|
open my $fh, '<', $infile |
257
|
|
|
|
|
|
|
or die "Could not open input file '$infile' for reading: $!"; |
258
|
3
|
|
|
|
|
51
|
my $first_line = <$fh>; |
259
|
3
|
50
|
|
|
|
24
|
$exsort->feed($first_line) if $first_line !~ /^!_TAG_FILE_SORTED/; |
260
|
3
|
|
|
|
|
10
|
while (<$fh>) { |
261
|
6
|
|
|
|
|
41
|
$exsort->feed($_); |
262
|
|
|
|
|
|
|
} |
263
|
3
|
|
|
|
|
36
|
close $fh; |
264
|
|
|
|
|
|
|
} |
265
|
2
|
|
|
|
|
10
|
$exsort->finish(); |
266
|
2
|
|
|
|
|
47
|
while (defined($_ = $exsort->fetch)) { |
267
|
9
|
|
|
|
|
35
|
print $ofh $_; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
2
|
|
|
|
|
44
|
return(1); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub _memory_sort { |
275
|
7
|
|
|
7
|
|
8
|
warn "running _memory_sort" if DEBUG; |
276
|
7
|
|
|
|
|
11
|
my $self = shift; |
277
|
7
|
|
|
|
|
9
|
my $ofh = shift; |
278
|
7
|
|
|
|
|
18
|
my @infiles = @_; |
279
|
|
|
|
|
|
|
|
280
|
7
|
|
|
|
|
80
|
local $/ = "\n"; |
281
|
7
|
|
|
|
|
10
|
my @records; |
282
|
7
|
|
|
|
|
13
|
foreach my $infile (@infiles) { |
283
|
11
|
50
|
|
|
|
353
|
open my $fh, '<', $infile |
284
|
|
|
|
|
|
|
or die "Could not open input file '$infile' for reading: $!"; |
285
|
11
|
|
|
|
|
144
|
my $first_line = <$fh>; |
286
|
11
|
50
|
|
|
|
79
|
push @records, $first_line if $first_line !~ /^!_TAG_FILE_SORTED/; |
287
|
11
|
|
|
|
|
88
|
push @records, <$fh>; |
288
|
11
|
|
|
|
|
148
|
close $fh; |
289
|
|
|
|
|
|
|
} |
290
|
7
|
|
|
|
|
33
|
@records = sort @records; # check fast inplace sort |
291
|
|
|
|
|
|
|
|
292
|
7
|
|
|
|
|
81
|
print $ofh "!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted/\n"; |
293
|
7
|
|
|
|
|
15
|
print $ofh @records; |
294
|
|
|
|
|
|
|
|
295
|
7
|
|
|
|
|
320
|
return(1); |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
1; |
299
|
|
|
|
|
|
|
__END__ |