| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MyCPAN::Indexer::Worker; |
|
2
|
1
|
|
|
1
|
|
1447
|
use strict; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
35
|
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
31
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use parent qw(MyCPAN::Indexer::Component); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
6
|
1
|
|
|
1
|
|
71
|
use vars qw($VERSION $logger); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
73
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.28_12'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use Cwd; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
82
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use File::Basename; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
76
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use File::Spec::Functions qw(catfile); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
42
|
|
|
12
|
1
|
|
|
1
|
|
6
|
use Log::Log4perl; |
|
|
1
|
|
|
|
|
45
|
|
|
|
1
|
|
|
|
|
8
|
|
|
13
|
1
|
|
|
1
|
|
83
|
use MyCPAN::Indexer; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use Proc::ProcessTable; |
|
15
|
|
|
|
|
|
|
use YAML; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
MyCPAN::Indexer::Worker - Do the indexing |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
Use this in backpan_indexer.pl by specifying it as the queue class: |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# in backpan_indexer.config |
|
26
|
|
|
|
|
|
|
worker_class MyCPAN::Indexer::Worker |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This class takes a distribution and analyses it. This is what the dispatcher |
|
31
|
|
|
|
|
|
|
hands a disribution to for the actual indexing. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 Methods |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=over 4 |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=item get_task |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
C sets the C key in the notes. The |
|
40
|
|
|
|
|
|
|
value is a code reference that takes a distribution path as its only |
|
41
|
|
|
|
|
|
|
argument and indexes that distribution. |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
See L for details about what C expects |
|
44
|
|
|
|
|
|
|
and should do. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
BEGIN { |
|
49
|
|
|
|
|
|
|
$logger = Log::Log4perl->get_logger( 'Worker' ); |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item component_type |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
This is a worker component. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub component_type { $_[0]->worker_type } |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item get_task |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=cut |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub get_task |
|
65
|
|
|
|
|
|
|
{ |
|
66
|
|
|
|
|
|
|
my( $self ) = @_; |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
my $config = $self->get_config; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $coordinator = $self->get_coordinator; |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my $indexer = $coordinator->get_component( 'indexer' ); |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
$logger->debug( "Worker class is " . __PACKAGE__ ); |
|
75
|
|
|
|
|
|
|
$logger->debug( "Indexer class is " . $indexer->class ); |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my $child_task = sub { |
|
78
|
|
|
|
|
|
|
my $dist = shift; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $dist_basename = basename( $dist ); |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
my $basename = $coordinator->get_reporter->check_for_previous_successful_result( $dist ); |
|
83
|
|
|
|
|
|
|
$logger->debug( "Found successful report for $dist_basename" ) unless $basename; |
|
84
|
|
|
|
|
|
|
return bless { |
|
85
|
|
|
|
|
|
|
dist_info => { |
|
86
|
|
|
|
|
|
|
dist_path => $dist, |
|
87
|
|
|
|
|
|
|
dist_basename => $dist_basename |
|
88
|
|
|
|
|
|
|
}, |
|
89
|
|
|
|
|
|
|
skipped => 1, |
|
90
|
|
|
|
|
|
|
}, $indexer->class unless $basename; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $previous_error_basename = $coordinator->get_reporter->check_for_previous_error_result( $dist ) || ''; |
|
93
|
|
|
|
|
|
|
$logger->debug( "Error report returned [$previous_error_basename]" ); |
|
94
|
|
|
|
|
|
|
$logger->debug( "Found error report for $dist_basename" ) if $previous_error_basename; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# we used to handle this by just deleting all the old error |
|
97
|
|
|
|
|
|
|
# reports in setup_dirs over in MyCPAN::App::BackPAN::Indexer |
|
98
|
|
|
|
|
|
|
# deleting all the reports before we got started made it |
|
99
|
|
|
|
|
|
|
# impossible to get a list of error reports to retry |
|
100
|
|
|
|
|
|
|
if( $previous_error_basename and ! $config->retry_errors ) |
|
101
|
|
|
|
|
|
|
{ |
|
102
|
|
|
|
|
|
|
$logger->debug( "By config, skipping $dist because I'm not retrying errors" ); |
|
103
|
|
|
|
|
|
|
return bless { |
|
104
|
|
|
|
|
|
|
dist_info => { |
|
105
|
|
|
|
|
|
|
dist_path => $dist, |
|
106
|
|
|
|
|
|
|
dist_basename => $dist_basename, |
|
107
|
|
|
|
|
|
|
}, |
|
108
|
|
|
|
|
|
|
skip_error => 1, |
|
109
|
|
|
|
|
|
|
}, $self->get_config->indexer_class; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
elsif( $previous_error_basename and $config->retry_errors ) |
|
112
|
|
|
|
|
|
|
{ |
|
113
|
|
|
|
|
|
|
# if we are re-trying errors and there is already a report |
|
114
|
|
|
|
|
|
|
# unlink the previous report |
|
115
|
|
|
|
|
|
|
my $report_full_path = $coordinator->get_reporter->get_error_report_path( $dist ); |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$logger->debug( "Trying to unlink $report_full_path" ); |
|
118
|
|
|
|
|
|
|
my $rc = unlink $report_full_path; |
|
119
|
|
|
|
|
|
|
$logger->debug( ($rc ? 'unlinked ' : 'failed to unlink ') . $report_full_path ); |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
$logger->info( "Starting Worker for $dist_basename\n" ); |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $starting_dir = cwd(); |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
unless( chdir $config->temp_dir ) |
|
127
|
|
|
|
|
|
|
{ |
|
128
|
|
|
|
|
|
|
$logger->error( "Could not change to " . $config->temp_dir . " : $!\n" ); |
|
129
|
|
|
|
|
|
|
exit 255; |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
$logger->debug( sprintf "Setting alarm for %d seconds", $config->alarm ); |
|
133
|
|
|
|
|
|
|
local $SIG{ALRM} = sub { |
|
134
|
|
|
|
|
|
|
$logger->info( "Alarm rang for $dist_basename in process $$!\n" ); |
|
135
|
|
|
|
|
|
|
$self->_cleanup_children; |
|
136
|
|
|
|
|
|
|
$logger->info( "Cleaned up" ); |
|
137
|
|
|
|
|
|
|
die; |
|
138
|
|
|
|
|
|
|
}; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
local $SIG{CHLD} = 'IGNORE'; |
|
141
|
|
|
|
|
|
|
alarm( $config->alarm || 15 ); |
|
142
|
|
|
|
|
|
|
$logger->debug( "Examining $dist_basename" ); |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
my $info = do { |
|
145
|
|
|
|
|
|
|
unless( -e $dist ) |
|
146
|
|
|
|
|
|
|
{ |
|
147
|
|
|
|
|
|
|
$logger->warn( "Dist $dist does not exist" ); |
|
148
|
|
|
|
|
|
|
undef; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
elsif( ! -s $dist ) |
|
151
|
|
|
|
|
|
|
{ |
|
152
|
|
|
|
|
|
|
$logger->warn( "Dist $dist has zero size" ); |
|
153
|
|
|
|
|
|
|
my $info = bless {}, $self->get_config->indexer_class; |
|
154
|
|
|
|
|
|
|
$info->setup_dist_info( $dist ); |
|
155
|
|
|
|
|
|
|
$info->set_dist_info( 'unindexable', 'zero size' ); |
|
156
|
|
|
|
|
|
|
$info->setup_run_info; |
|
157
|
|
|
|
|
|
|
$info->set_run_info( qw(completed 1) ); |
|
158
|
|
|
|
|
|
|
$info; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
else |
|
161
|
|
|
|
|
|
|
{ |
|
162
|
|
|
|
|
|
|
$logger->warn( "Indexing $dist" ); |
|
163
|
|
|
|
|
|
|
eval { $indexer->run( $dist ) }; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
}; |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
$logger->debug( "Done examining $dist_basename" ); |
|
169
|
|
|
|
|
|
|
my $at = $@; chomp $at; |
|
170
|
|
|
|
|
|
|
alarm 0; |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
chdir $starting_dir; |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
unless( defined $info ) |
|
175
|
|
|
|
|
|
|
{ |
|
176
|
|
|
|
|
|
|
$logger->error( "Indexing failed for $dist_basename: $at" ); |
|
177
|
|
|
|
|
|
|
$info = bless {}, $self->get_config->indexer_class; # XXX TODO make this a real class |
|
178
|
|
|
|
|
|
|
$info->setup_dist_info( $dist ); |
|
179
|
|
|
|
|
|
|
$info->setup_run_info; |
|
180
|
|
|
|
|
|
|
$info->set_run_info( qw(completed 0) ); |
|
181
|
|
|
|
|
|
|
$info->set_run_info( error => $at ); |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
elsif( ! eval { $info->run_info( 'completed' ) } ) |
|
184
|
|
|
|
|
|
|
{ |
|
185
|
|
|
|
|
|
|
$logger->error( "Indexing for $dist_basename did not complete" ); |
|
186
|
|
|
|
|
|
|
$self->_copy_bad_dist( $info ) if $config->copy_bad_dists; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
$self->_add_run_info( $info ); |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
$coordinator->get_note('reporter')->( $info ); |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
$logger->debug( "Worker for $dist_basename done" ); |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# some things hang anyway, so just to be careful we'll cleanup |
|
196
|
|
|
|
|
|
|
# everything here. |
|
197
|
|
|
|
|
|
|
$self->_cleanup_children; |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$logger->debug( "Cleaned up, returning..." ); |
|
200
|
|
|
|
|
|
|
$info; |
|
201
|
|
|
|
|
|
|
}; |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
$coordinator->set_note( 'child_task', $child_task ); |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
1; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _cleanup_children |
|
209
|
|
|
|
|
|
|
{ |
|
210
|
|
|
|
|
|
|
$logger->warn( "Cleaning up after $$" ); |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
my %children = |
|
213
|
|
|
|
|
|
|
map { $_->{pid}, 1 } |
|
214
|
|
|
|
|
|
|
grep { $_->{'ppid'} == $$ } |
|
215
|
|
|
|
|
|
|
@{ Proc::ProcessTable->new->table }; |
|
216
|
|
|
|
|
|
|
$logger->debug( "Child processes are @{[keys %children]}" ); |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
my @grandchildren = |
|
219
|
|
|
|
|
|
|
map { $_->{pid} } |
|
220
|
|
|
|
|
|
|
grep { exists $children{ $_->{'ppid'} } } |
|
221
|
|
|
|
|
|
|
@{ Proc::ProcessTable->new->table }; |
|
222
|
|
|
|
|
|
|
$logger->debug( "Grandchild processes are @grandchildren" ); |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
my @processes = ( keys %children, @grandchildren ); |
|
225
|
|
|
|
|
|
|
$logger->debug( "There are " . @processes . " processes to clean up" ); |
|
226
|
|
|
|
|
|
|
return unless @processes; |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
$logger->debug( "Preparing to kill" ); |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
kill 9, @processes; |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
return; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub _copy_bad_dist |
|
236
|
|
|
|
|
|
|
{ |
|
237
|
|
|
|
|
|
|
my( $self, $info ) = @_; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
my $config = $self->get_config; |
|
240
|
|
|
|
|
|
|
my $bad_dist_dir = $config->copy_bad_dists; |
|
241
|
|
|
|
|
|
|
return unless $bad_dist_dir; |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
unless( -d $bad_dist_dir and mkdir $bad_dist_dir ) { |
|
244
|
|
|
|
|
|
|
$logger->error( "Could not make dist dir [$bad_dist_dir]: $!" ); |
|
245
|
|
|
|
|
|
|
return; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
my $dist_file = $info->dist_info( 'dist_file' ); |
|
249
|
|
|
|
|
|
|
my $basename = $info->dist_info( 'dist_basename' ); |
|
250
|
|
|
|
|
|
|
my $new_name = catfile( $bad_dist_dir, $basename ); |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
unless( -e $new_name ) |
|
253
|
|
|
|
|
|
|
{ |
|
254
|
|
|
|
|
|
|
$logger->debug( "Copying bad dist" ); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
my( $in, $out ); |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
unless( open $out, ">", $new_name ) |
|
259
|
|
|
|
|
|
|
{ |
|
260
|
|
|
|
|
|
|
$logger->fatal( "Could not copy bad dist to $new_name: $!" ); |
|
261
|
|
|
|
|
|
|
return; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
unless( open $in, "<", $dist_file ) |
|
265
|
|
|
|
|
|
|
{ |
|
266
|
|
|
|
|
|
|
$logger->fatal( "Could not open bad dist to $dist_file: $!" ); |
|
267
|
|
|
|
|
|
|
return; |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
while( <$in> ) { print { $out } $_ } |
|
271
|
|
|
|
|
|
|
close $in; |
|
272
|
|
|
|
|
|
|
close $out; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _add_run_info |
|
277
|
|
|
|
|
|
|
{ |
|
278
|
|
|
|
|
|
|
my( $self, $info ) = @_; |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
my $config = $self->get_config; |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
return unless eval { $info->can( 'set_run_info' ) }; |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
$info->set_run_info( $_, $config->get( $_ ) ) |
|
285
|
|
|
|
|
|
|
foreach ( $config->directives ); |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
$info->set_run_info( 'uuid', $self->get_note( 'UUID' ) ); |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$info->set_run_info( 'child_pid', $$ ); |
|
290
|
|
|
|
|
|
|
$info->set_run_info( 'parent_pid', eval { $config->indexer_class->getppid } ); |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
$info->set_run_info( 'ENV', \%ENV ); |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
return 1; |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=back |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
MyCPAN::Indexer, MyCPAN::Indexer::Tutorial |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 SOURCE AVAILABILITY |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
This code is in Github: |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
git://github.com/briandfoy/mycpan-indexer.git |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head1 AUTHOR |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
brian d foy, C<< >> |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Copyright (c) 2008-2013, brian d foy, All Rights Reserved. |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
You may redistribute this under the same terms as Perl itself. |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
1; |