| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
2
|
|
|
2
|
|
142859
|
use 5.006; |
|
|
2
|
|
|
|
|
9
|
|
|
|
2
|
|
|
|
|
83
|
|
|
2
|
2
|
|
|
2
|
|
11
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
159
|
|
|
3
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
107
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package File::RandomLine; |
|
6
|
|
|
|
|
|
|
# ABSTRACT: Retrieve random lines from a file |
|
7
|
|
|
|
|
|
|
our $VERSION = '0.20'; # VERSION |
|
8
|
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
151
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Required modules |
|
12
|
2
|
|
|
2
|
|
2213
|
use Want 'howmany'; |
|
|
2
|
|
|
|
|
5645
|
|
|
|
2
|
|
|
|
|
20544
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
|
17
|
6
|
|
|
6
|
1
|
13613
|
my ($class, $filename, $args) = @_; |
|
18
|
6
|
100
|
|
|
|
52
|
croak "new requires a filename parameter" unless $filename; |
|
19
|
5
|
|
100
|
|
|
33
|
my $algo = $args->{algorithm} || q{}; |
|
20
|
5
|
100
|
100
|
|
|
71
|
croak "unknown algorithm '$algo'" if $algo && $algo !~ /fast|uniform/i; |
|
21
|
4
|
100
|
|
|
|
208
|
open(my $fh, "<", $filename) or croak "Can't read $filename"; |
|
22
|
3
|
100
|
|
|
|
16
|
my $line_index = lc $algo eq 'uniform' ? _index_file($fh) : undef ; |
|
23
|
3
|
|
|
|
|
28
|
my $filesize = -s $fh; |
|
24
|
3
|
100
|
|
|
|
25
|
my $self = { |
|
25
|
|
|
|
|
|
|
fh => $fh, |
|
26
|
|
|
|
|
|
|
line_index => $line_index, |
|
27
|
|
|
|
|
|
|
line_count => $line_index ? scalar @$line_index : undef, |
|
28
|
|
|
|
|
|
|
filesize => $filesize |
|
29
|
|
|
|
|
|
|
}; |
|
30
|
3
|
100
|
|
|
|
32
|
return bless( $self, ref($class) ? ref($class) : $class ); |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
34
|
|
|
|
|
|
|
# _index_file |
|
35
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _index_file { |
|
38
|
1
|
|
|
1
|
|
3
|
my ($fh) = @_; |
|
39
|
1
|
|
|
|
|
2
|
my @index; |
|
40
|
1
|
|
|
|
|
30
|
while (! eof $fh) { |
|
41
|
4
|
|
|
|
|
8
|
push @index, tell $fh; |
|
42
|
4
|
|
|
|
|
19
|
<$fh>; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
1
|
|
|
|
|
4
|
return \@index; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
48
|
|
|
|
|
|
|
# next() |
|
49
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub next { |
|
53
|
60
|
|
|
60
|
1
|
89697
|
my ($self,$n) = @_; |
|
54
|
|
|
|
|
|
|
# behavior copied from File::Random |
|
55
|
60
|
100
|
100
|
|
|
619
|
if (!defined($n) and wantarray) { |
|
56
|
4
|
|
|
|
|
18
|
$n = howmany(); |
|
57
|
4
|
|
100
|
|
|
317
|
$n ||= 1; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
60
|
100
|
100
|
|
|
282
|
unless (!defined($n) or $n =~ /^\d+$/) { |
|
60
|
6
|
|
|
|
|
105
|
croak "Number of random_lines should be a positive integer, not '$n'"; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
54
|
100
|
100
|
|
|
322
|
carp "Strange call to File::Random->next(): 0 random lines requested" |
|
63
|
|
|
|
|
|
|
if defined($n) and $n == 0; |
|
64
|
54
|
|
100
|
|
|
1985
|
$n ||= 1; |
|
65
|
54
|
|
|
|
|
154
|
my @sample; |
|
66
|
54
|
|
|
|
|
141
|
while (@sample < $n) { |
|
67
|
60
|
100
|
|
|
|
255
|
push @sample, $self->{line_index} ? $self->_uniform : $self->_fast; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
54
|
|
|
|
|
128
|
chomp @sample; |
|
70
|
54
|
100
|
|
|
|
544
|
return wantarray ? @sample : shift @sample; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
75
|
|
|
|
|
|
|
# Fast Algorithm |
|
76
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _fast { |
|
79
|
48
|
|
|
48
|
|
61
|
my $self = shift; |
|
80
|
48
|
|
|
|
|
109
|
my $fh = $self->{fh}; |
|
81
|
48
|
|
|
|
|
183
|
seek($fh,int(rand($self->{filesize})),0); |
|
82
|
48
|
|
|
|
|
6848
|
<$fh>; # skip this fragment of a line |
|
83
|
48
|
100
|
|
|
|
282
|
seek($fh,0,0) if eof $fh; # wrap if hit EOF |
|
84
|
48
|
|
|
|
|
438
|
return scalar <$fh>; # get the next line |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
88
|
|
|
|
|
|
|
# Uniform Algorithm |
|
89
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _uniform { |
|
92
|
12
|
|
|
12
|
|
79
|
my $self = shift; |
|
93
|
12
|
|
|
|
|
16
|
my $fh = $self->{fh}; |
|
94
|
12
|
|
|
|
|
42
|
my $start = $self->{line_index}[int(rand($self->{line_count}))]; |
|
95
|
12
|
|
|
|
|
331
|
seek($fh,$start,0); |
|
96
|
12
|
|
|
|
|
125
|
return scalar <$fh>; # get the next line |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
1; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
__END__ |