| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package List::MergeSorted::XS; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
22163
|
use 5.008; |
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
45
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
35
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
32
|
|
|
6
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
127
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
|
9
|
1
|
|
|
1
|
|
1791
|
use AutoLoader; |
|
|
1
|
|
|
|
|
1759
|
|
|
|
1
|
|
|
|
|
6
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our @EXPORT_OK = qw(merge); |
|
14
|
|
|
|
|
|
|
our @EXPORT = qw(); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.06'; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
require XSLoader; |
|
19
|
|
|
|
|
|
|
XSLoader::load('List::MergeSorted::XS', $VERSION); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use constant { |
|
22
|
1
|
|
|
|
|
1178
|
PRIO_LINEAR => 0, |
|
23
|
|
|
|
|
|
|
PRIO_FIB => 1, |
|
24
|
|
|
|
|
|
|
SORT => 2, |
|
25
|
1
|
|
|
1
|
|
108
|
}; |
|
|
1
|
|
|
|
|
1
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub merge { |
|
28
|
129
|
|
|
129
|
1
|
242536
|
my $lists = shift; |
|
29
|
129
|
|
|
|
|
364
|
my %opts = @_; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# validate inputs |
|
32
|
129
|
100
|
100
|
|
|
1008
|
unless ($lists && ref $lists && ref $lists eq 'ARRAY') { |
|
|
|
|
66
|
|
|
|
|
|
33
|
3
|
|
|
|
|
20
|
die "merge requires an array reference"; |
|
34
|
|
|
|
|
|
|
} |
|
35
|
126
|
|
|
|
|
200
|
for my $list (@$lists) { |
|
36
|
549
|
100
|
66
|
|
|
3156
|
unless ($list && ref $list && ref $list eq 'ARRAY') { |
|
|
|
|
66
|
|
|
|
|
|
37
|
1
|
|
|
|
|
14
|
die "lists to merge must be arrayrefs"; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
125
|
|
100
|
|
|
471
|
my $limit = $opts{limit} || 0; |
|
42
|
125
|
50
|
33
|
|
|
464
|
die "limit must be positive" if defined $limit && $limit < 0; |
|
43
|
|
|
|
|
|
|
|
|
44
|
125
|
50
|
66
|
|
|
435
|
die "key_cb option must be a coderef" |
|
45
|
|
|
|
|
|
|
if defined $opts{key_cb} && ref $opts{key_cb} ne 'CODE'; |
|
46
|
|
|
|
|
|
|
|
|
47
|
125
|
50
|
66
|
|
|
337
|
die "uniq_cb option must be a coderef" |
|
48
|
|
|
|
|
|
|
if defined $opts{uniq_cb} && ref $opts{uniq_cb} ne 'CODE'; |
|
49
|
|
|
|
|
|
|
|
|
50
|
125
|
100
|
|
|
|
257
|
return [] unless @$lists; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# pick an algorithm |
|
53
|
124
|
|
|
|
|
302
|
my @params = ($lists, $limit, $opts{key_cb}, $opts{uniq_cb}); |
|
54
|
|
|
|
|
|
|
|
|
55
|
124
|
100
|
|
|
|
256
|
if (defined $opts{method}) { |
|
56
|
87
|
|
|
|
|
219
|
return _merge($opts{method}, @params); |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
37
|
100
|
|
|
|
71
|
if (defined $opts{key_cb}) { |
|
60
|
|
|
|
|
|
|
# linear priority queue is faster until ~100 lists, relatively |
|
61
|
|
|
|
|
|
|
# independent of limit %. sort never wins in keyed mode because of |
|
62
|
|
|
|
|
|
|
# Schwartzian tx overhead |
|
63
|
|
|
|
|
|
|
|
|
64
|
18
|
50
|
|
|
|
54
|
return scalar @$lists < 100 |
|
65
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
|
66
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params); |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
else { |
|
69
|
|
|
|
|
|
|
# linear always wins with a small number of lists (<100). with more |
|
70
|
|
|
|
|
|
|
# lists, fib wins with low limit, giving way to sort around 25% |
|
71
|
|
|
|
|
|
|
# limit. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# compute what fraction of the merged set will be returned |
|
74
|
19
|
|
|
|
|
56
|
my $total = _count_elements($lists); |
|
75
|
19
|
|
100
|
|
|
51
|
$limit ||= $total; |
|
76
|
|
|
|
|
|
|
|
|
77
|
19
|
50
|
|
|
|
72
|
if ($limit < 0.05 * $total) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
78
|
0
|
0
|
|
|
|
0
|
return scalar @$lists < 1000 |
|
79
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
|
80
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
elsif ($limit < 0.25 * $total) { |
|
83
|
2
|
50
|
|
|
|
8
|
return scalar @$lists < 500 |
|
84
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
|
85
|
|
|
|
|
|
|
: _merge(PRIO_FIB, @params) |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
elsif ($limit < 0.75 * $total) { |
|
88
|
4
|
50
|
|
|
|
13
|
return scalar @$lists < 100 |
|
89
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
|
90
|
|
|
|
|
|
|
: _merge(SORT, @params) |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
else { |
|
93
|
13
|
50
|
|
|
|
39
|
return scalar @$lists < 100 |
|
94
|
|
|
|
|
|
|
? _merge(PRIO_LINEAR, @params) |
|
95
|
|
|
|
|
|
|
: _merge(SORT, @params) |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# dispatch to appopriate implementation based on algorithm and options |
|
101
|
|
|
|
|
|
|
sub _merge { |
|
102
|
124
|
|
|
124
|
|
206
|
my ($method, $lists, $limit, $key_cb, $uniq_cb) = @_; |
|
103
|
|
|
|
|
|
|
|
|
104
|
124
|
100
|
|
|
|
316
|
if ($method == PRIO_LINEAR) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
105
|
66
|
100
|
|
|
|
964
|
return $key_cb ? $uniq_cb ? _merge_linear_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
: _merge_linear_keyed_dupeok($lists, $limit, $key_cb) |
|
107
|
|
|
|
|
|
|
: $uniq_cb ? _merge_linear_flat_dedupe($lists, $limit, $uniq_cb) |
|
108
|
|
|
|
|
|
|
: _merge_linear_flat_dupeok($lists, $limit); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
elsif ($method == PRIO_FIB) { |
|
111
|
29
|
100
|
|
|
|
2142
|
return $key_cb ? $uniq_cb ? _merge_fib_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
: _merge_fib_keyed_dupeok($lists, $limit, $key_cb) |
|
113
|
|
|
|
|
|
|
: $uniq_cb ? _merge_fib_flat_dedupe($lists, $limit, $uniq_cb) |
|
114
|
|
|
|
|
|
|
: _merge_fib_flat_dupeok($lists, $limit); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
elsif ($method == SORT) { |
|
117
|
29
|
100
|
|
|
|
129
|
return $key_cb ? $uniq_cb ? _merge_sort_keyed_dedupe($lists, $limit, $key_cb, $uniq_cb) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
: _merge_sort_keyed_dupeok($lists, $limit, $key_cb) |
|
119
|
|
|
|
|
|
|
: $uniq_cb ? _merge_sort_flat_dedupe($lists, $limit, $uniq_cb) |
|
120
|
|
|
|
|
|
|
: _merge_sort_flat_dupeok($lists, $limit); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
else { |
|
123
|
0
|
|
|
|
|
0
|
die "unknown sort method $method requested\n"; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# concatenate all lists and sort the whole thing. works well when no limit is |
|
128
|
|
|
|
|
|
|
# given. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub _merge_sort_flat_dupeok { |
|
131
|
12
|
|
|
12
|
|
562
|
my ($lists, $limit) = @_; |
|
132
|
|
|
|
|
|
|
|
|
133
|
12
|
|
|
|
|
23
|
my @output = sort {$a <=> $b} map {@$_} @$lists; |
|
|
11580
|
|
|
|
|
9430
|
|
|
|
63
|
|
|
|
|
322
|
|
|
134
|
12
|
100
|
66
|
|
|
153
|
splice @output, $limit if $limit && @output > $limit; |
|
135
|
12
|
|
|
|
|
85
|
return \@output; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub _merge_sort_keyed_dupeok { |
|
139
|
12
|
|
|
12
|
|
22
|
my ($lists, $limit, $keyer) = @_; |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Schwartzian transform is faster than sorting on |
|
142
|
|
|
|
|
|
|
# {$keyer->($a) <=> # $keyer->($b)}, even for degenerately simple case |
|
143
|
|
|
|
|
|
|
# of $keyer = sub { $_[0] } |
|
144
|
|
|
|
|
|
|
|
|
145
|
3113
|
|
|
|
|
4406
|
my @output = |
|
146
|
11548
|
|
|
|
|
11446
|
map { $_->[1] } |
|
147
|
3113
|
|
|
|
|
10946
|
sort { $a->[0] <=> $b->[0] } |
|
148
|
61
|
|
|
|
|
346
|
map { [$keyer->($_), $_] } |
|
149
|
12
|
|
|
|
|
39
|
map { @$_ } |
|
150
|
|
|
|
|
|
|
@$lists; |
|
151
|
|
|
|
|
|
|
|
|
152
|
12
|
100
|
66
|
|
|
649
|
splice @output, $limit if $limit && @output > $limit; |
|
153
|
12
|
|
|
|
|
94
|
return \@output; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub _merge_sort_flat_dedupe { |
|
157
|
1
|
|
|
1
|
|
2
|
my ($lists, $limit, $uniquer) = @_; |
|
158
|
|
|
|
|
|
|
|
|
159
|
1
|
|
|
|
|
2
|
my @merged = sort {$a <=> $b} map {@$_} @$lists; |
|
|
12
|
|
|
|
|
14
|
|
|
|
3
|
|
|
|
|
10
|
|
|
160
|
|
|
|
|
|
|
|
|
161
|
1
|
|
|
|
|
2
|
my @output; |
|
162
|
1
|
|
|
|
|
1
|
my $last_unique = undef; |
|
163
|
1
|
|
|
|
|
3
|
for my $element (@merged) { |
|
164
|
7
|
|
|
|
|
13
|
my $unique = $uniquer->($element); |
|
165
|
7
|
100
|
100
|
|
|
34
|
next if defined $last_unique && $unique == $last_unique; |
|
166
|
5
|
|
|
|
|
5
|
push @output, $element; |
|
167
|
5
|
|
|
|
|
8
|
$last_unique = $unique; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
1
|
50
|
33
|
|
|
6
|
splice @output, $limit if $limit && @output > $limit; |
|
170
|
1
|
|
|
|
|
6
|
return \@output; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub _merge_sort_keyed_dedupe { |
|
174
|
4
|
|
|
4
|
|
7
|
my ($lists, $limit, $keyer, $uniquer) = @_; |
|
175
|
|
|
|
|
|
|
|
|
176
|
16
|
|
|
|
|
26
|
my @merged = |
|
177
|
17
|
|
|
|
|
44
|
map { $_->[1] } |
|
178
|
16
|
|
|
|
|
62
|
sort { $a->[0] <=> $b->[0] } |
|
179
|
7
|
|
|
|
|
16
|
map { [$keyer->($_), $_] } |
|
180
|
4
|
|
|
|
|
8
|
map { @$_ } |
|
181
|
|
|
|
|
|
|
@$lists; |
|
182
|
|
|
|
|
|
|
|
|
183
|
4
|
|
|
|
|
13
|
my @output; |
|
184
|
|
|
|
|
|
|
my %seen; |
|
185
|
4
|
|
|
|
|
5
|
for my $element (@merged) { |
|
186
|
16
|
|
|
|
|
45
|
my $unique = $uniquer->($element); |
|
187
|
16
|
100
|
|
|
|
77
|
next if $seen{$unique}++; |
|
188
|
11
|
|
|
|
|
22
|
push @output, $element; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
4
|
50
|
33
|
|
|
15
|
splice @output, $limit if $limit && @output > $limit; |
|
192
|
4
|
|
|
|
|
25
|
return \@output; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
1; |
|
196
|
|
|
|
|
|
|
__END__ |