| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
3
|
|
|
3
|
|
71610
|
use strict; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
71
|
|
|
2
|
3
|
|
|
3
|
|
11
|
use warnings; |
|
|
3
|
|
|
|
|
1
|
|
|
|
3
|
|
|
|
|
86
|
|
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package HTTP::Async::Polite; |
|
5
|
3
|
|
|
3
|
|
9
|
use base 'HTTP::Async'; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
873
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.33'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
10
|
use Carp; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
124
|
|
|
10
|
3
|
|
|
3
|
|
10
|
use Data::Dumper; |
|
|
3
|
|
|
|
|
2
|
|
|
|
3
|
|
|
|
|
112
|
|
|
11
|
3
|
|
|
3
|
|
9
|
use Time::HiRes qw( time sleep ); |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
12
|
|
|
12
|
3
|
|
|
3
|
|
229
|
use URI; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
1150
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
HTTP::Async::Polite - politely process multiple HTTP requests |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
See L - the usage is unchanged. |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This L module allows you to have many requests going on at once. |
|
25
|
|
|
|
|
|
|
This can be very rude if you are fetching several pages from the same domain. |
|
26
|
|
|
|
|
|
|
This module add limits to the number of simultaneous requests to a given |
|
27
|
|
|
|
|
|
|
domain and adds an interval between the requests. |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
In all other ways it is identical in use to the original L. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 NEW METHODS |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 send_interval |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Getter and setter for the C - the time in seconds to leave |
|
36
|
|
|
|
|
|
|
between each request for a given domain. By default this is set to 5 seconds. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=cut |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub send_interval { |
|
41
|
6
|
|
|
6
|
1
|
12
|
my $self = shift; |
|
42
|
|
|
|
|
|
|
return scalar @_ |
|
43
|
6
|
100
|
|
|
|
34
|
? $self->_set_opt( 'send_interval', @_ ) |
|
44
|
|
|
|
|
|
|
: $self->_get_opt('send_interval'); |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 OVERLOADED METHODS |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
These methods are overloaded but otherwise work exactly as the original |
|
50
|
|
|
|
|
|
|
methods did. The docs here just describe what they do differently. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 new |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Sets the C value to the default of 5 seconds. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub new { |
|
59
|
6
|
|
|
6
|
1
|
35
|
my $class = shift; |
|
60
|
|
|
|
|
|
|
|
|
61
|
6
|
|
|
|
|
31
|
my $self = $class->SUPER::new; |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Set the interval between sends. |
|
64
|
6
|
|
|
|
|
16
|
$self->{opts}{send_interval} = 5; # seconds |
|
65
|
6
|
|
|
|
|
18
|
$class->_add_get_set_key('send_interval'); |
|
66
|
|
|
|
|
|
|
|
|
67
|
6
|
|
|
|
|
11
|
$self->_init(@_); |
|
68
|
|
|
|
|
|
|
|
|
69
|
6
|
|
|
|
|
7
|
return $self; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 add_with_opts |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Adds the request to the correct queue depending on the domain. |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub add_with_opts { |
|
79
|
7
|
|
|
7
|
1
|
6
|
my $self = shift; |
|
80
|
7
|
|
|
|
|
5
|
my $req = shift; |
|
81
|
7
|
|
|
|
|
6
|
my $opts = shift; |
|
82
|
7
|
|
|
|
|
26
|
my $id = $self->_next_id; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Instead of putting this request and opts directly onto the to_send array |
|
85
|
|
|
|
|
|
|
# instead get the domain and add it to the domain's queue. Store this |
|
86
|
|
|
|
|
|
|
# domain with the opts so that it is easy to get at. |
|
87
|
7
|
|
|
|
|
18
|
my $uri = URI->new( $req->uri ); |
|
88
|
7
|
|
|
|
|
378
|
my $host = $uri->host; |
|
89
|
7
|
|
|
|
|
148
|
my $port = $uri->port; |
|
90
|
7
|
|
|
|
|
112
|
my $domain = "$host:$port"; |
|
91
|
7
|
|
|
|
|
20
|
$opts->{_domain} = $domain; |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Get the domain array - create it if needed. |
|
94
|
7
|
|
100
|
|
|
42
|
my $domain_arrayref = $self->{domain_stats}{$domain}{to_send} ||= []; |
|
95
|
|
|
|
|
|
|
|
|
96
|
7
|
|
|
|
|
9
|
push @{$domain_arrayref}, [ $req, $id ]; |
|
|
7
|
|
|
|
|
12
|
|
|
97
|
7
|
|
|
|
|
23
|
$self->{id_opts}{$id} = $opts; |
|
98
|
|
|
|
|
|
|
|
|
99
|
7
|
|
|
|
|
19
|
$self->poke; |
|
100
|
|
|
|
|
|
|
|
|
101
|
7
|
|
|
|
|
20
|
return $id; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head2 to_send_count |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Returns the number of requests waiting to be sent. This is the number in the |
|
107
|
|
|
|
|
|
|
actual queue plus the number in each domain specific queue. |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub to_send_count { |
|
112
|
200
|
|
|
200
|
1
|
389
|
my $self = shift; |
|
113
|
200
|
|
|
|
|
923
|
$self->poke; |
|
114
|
|
|
|
|
|
|
|
|
115
|
200
|
|
|
|
|
183
|
my $count = scalar @{ $$self{to_send} }; |
|
|
200
|
|
|
|
|
383
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
390
|
|
|
|
|
577
|
$count += scalar @{ $self->{domain_stats}{$_}{to_send} } |
|
118
|
200
|
|
|
|
|
217
|
for keys %{ $self->{domain_stats} }; |
|
|
200
|
|
|
|
|
566
|
|
|
119
|
|
|
|
|
|
|
|
|
120
|
200
|
|
|
|
|
781
|
return $count; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub _process_to_send { |
|
124
|
605
|
|
|
605
|
|
517
|
my $self = shift; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Go through the domain specific queues and add all requests that we can |
|
127
|
|
|
|
|
|
|
# to the real queue. |
|
128
|
605
|
|
|
|
|
418
|
foreach my $domain ( keys %{ $self->{domain_stats} } ) { |
|
|
605
|
|
|
|
|
1441
|
|
|
129
|
|
|
|
|
|
|
|
|
130
|
1177
|
|
|
|
|
1037
|
my $domain_stats = $self->{domain_stats}{$domain}; |
|
131
|
1177
|
100
|
|
|
|
718
|
next unless scalar @{ $domain_stats->{to_send} }; |
|
|
1177
|
|
|
|
|
2020
|
|
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# warn "TRYING TO ADD REQUEST FOR $domain"; |
|
134
|
|
|
|
|
|
|
# warn sleep 5; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Check that this request is good to go. |
|
137
|
966
|
100
|
|
|
|
1471
|
next if $domain_stats->{count}; |
|
138
|
933
|
100
|
100
|
|
|
2910
|
next unless time > ( $domain_stats->{next_send} || 0 ); |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# We can add this request. |
|
141
|
7
|
|
|
|
|
14
|
$domain_stats->{count}++; |
|
142
|
7
|
|
|
|
|
19
|
push @{ $self->{to_send} }, shift @{ $domain_stats->{to_send} }; |
|
|
7
|
|
|
|
|
11
|
|
|
|
7
|
|
|
|
|
27
|
|
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Use the original to send the requests on the queue. |
|
146
|
605
|
|
|
|
|
1615
|
return $self->SUPER::_process_to_send; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub _add_to_return_queue { |
|
150
|
7
|
|
|
7
|
|
6
|
my $self = shift; |
|
151
|
7
|
|
|
|
|
8
|
my $req_and_id = shift; |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# decrement the count for this domain so that another request can start. |
|
154
|
|
|
|
|
|
|
# Also set the interval so that we don't scrape too fast. |
|
155
|
7
|
|
|
|
|
10
|
my $id = $req_and_id->[1]; |
|
156
|
7
|
|
|
|
|
11
|
my $domain = $self->{id_opts}{$id}{_domain}; |
|
157
|
7
|
|
|
|
|
9
|
my $domain_stat = $self->{domain_stats}{$domain}; |
|
158
|
7
|
|
|
|
|
14
|
my $interval = $self->_get_opt( 'send_interval', $id ); |
|
159
|
|
|
|
|
|
|
|
|
160
|
7
|
|
|
|
|
10
|
$domain_stat->{count}--; |
|
161
|
7
|
|
|
|
|
20
|
$domain_stat->{next_send} = time + $interval; |
|
162
|
|
|
|
|
|
|
|
|
163
|
7
|
|
|
|
|
26
|
return $self->SUPER::_add_to_return_queue($req_and_id); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
L - the module that this one is based on. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 AUTHOR |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Edmund von der Burg C<< >>. |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
L |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 LICENCE AND COPYRIGHT |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Copyright (c) 2006, Edmund von der Burg C<< >>. |
|
179
|
|
|
|
|
|
|
All rights reserved. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under |
|
182
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTY |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE |
|
187
|
|
|
|
|
|
|
SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE |
|
188
|
|
|
|
|
|
|
STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE |
|
189
|
|
|
|
|
|
|
SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, |
|
190
|
|
|
|
|
|
|
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
191
|
|
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND |
|
192
|
|
|
|
|
|
|
PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, |
|
193
|
|
|
|
|
|
|
YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY |
|
196
|
|
|
|
|
|
|
COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE |
|
197
|
|
|
|
|
|
|
SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE LIABLE TO YOU FOR DAMAGES, |
|
198
|
|
|
|
|
|
|
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING |
|
199
|
|
|
|
|
|
|
OUT OF THE USE OR INABILITY TO USE THE SOFTWARE (INCLUDING BUT NOT LIMITED TO |
|
200
|
|
|
|
|
|
|
LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR |
|
201
|
|
|
|
|
|
|
THIRD PARTIES OR A FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER |
|
202
|
|
|
|
|
|
|
SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE |
|
203
|
|
|
|
|
|
|
POSSIBILITY OF SUCH DAMAGES. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
1; |
|
208
|
|
|
|
|
|
|
|