| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# WWW::Automate (c) 2002 Kirrily Robert |
|
4
|
|
|
|
|
|
|
# This software is distributed under the same licenses as Perl; see |
|
5
|
|
|
|
|
|
|
# the file COPYING for details. |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# $Id: Automate.pm,v 1.1.1.1 2005/12/01 03:07:33 chezskud Exp $ |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package WWW::Automate; |
|
12
|
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
975
|
use HTTP::Request; |
|
|
1
|
|
|
|
|
23231
|
|
|
|
1
|
|
|
|
|
43
|
|
|
14
|
1
|
|
|
1
|
|
1090
|
use LWP::UserAgent; |
|
|
1
|
|
|
|
|
22771
|
|
|
|
1
|
|
|
|
|
34
|
|
|
15
|
1
|
|
|
1
|
|
1035
|
use HTML::Form; |
|
|
1
|
|
|
|
|
19006
|
|
|
|
1
|
|
|
|
|
73
|
|
|
16
|
1
|
|
|
1
|
|
1018
|
use HTML::TokeParser; |
|
|
1
|
|
|
|
|
12280
|
|
|
|
1
|
|
|
|
|
36
|
|
|
17
|
1
|
|
|
1
|
|
683
|
use Clone qw(clone); |
|
|
1
|
|
|
|
|
15732
|
|
|
|
1
|
|
|
|
|
99
|
|
|
18
|
1
|
|
|
1
|
|
12
|
use Carp; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
1593
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @ISA = qw( LWP::UserAgent ); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $VERSION = $VERSION = "0.21"; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $headers; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=pod |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 NAME |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
WWW::Automate - automate interaction with websites |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NOTICE |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
B |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Please use WWW::Mechanize instead. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
use WWW::Automate; |
|
41
|
|
|
|
|
|
|
my $agent = WWW::Automate->new(); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$agent->get($url); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$agent->follow($link); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$agent->form($number); |
|
48
|
|
|
|
|
|
|
$agent->field($name, $value); |
|
49
|
|
|
|
|
|
|
$agent->click($button); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
$agent->back(); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$agent->add_header($name => $value); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
print "OK" if $agent->{content} =~ /$expected/; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
This module is intended to help you automate interaction with a website. |
|
60
|
|
|
|
|
|
|
It bears a not-very-remarkable outwards resemblance to WWW::Chat, on |
|
61
|
|
|
|
|
|
|
which it is based. The main difference between this module and |
|
62
|
|
|
|
|
|
|
WWW::Chat is that WWW::Chat requires a pre-processing stage before you |
|
63
|
|
|
|
|
|
|
can run your script, whereas WWW::Automate does not. |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
WWW::Automate is a subclass of LWP::UserAgent, so anything you can do |
|
66
|
|
|
|
|
|
|
with an LWP::UserAgent, you can also do with this. See |
|
67
|
|
|
|
|
|
|
L for more information on the possibilities. |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 new() |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Creates and returns a new WWW::Automate object, hereafter referred to as |
|
72
|
|
|
|
|
|
|
the 'agent'. |
|
73
|
1
|
|
|
|
|
41
|
|
|
|
1
|
|
|
|
|
5
|
|
|
74
|
1
|
|
|
1
|
|
2
|
my $agent = WWW::Automate->new() |
|
|
1
|
|
|
|
|
21327
|
|
|
|
1
|
|
|
|
|
598
|
|
|
75
|
1
|
|
|
1
|
|
6
|
|
|
|
1
|
|
|
|
|
312
|
|
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
22
|
|
|
76
|
1
|
|
|
1
|
|
128
|
=begin testing |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
2605
|
|
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
BEGIN: { |
|
79
|
1
|
|
|
|
|
555
|
use lib qw(lib/); |
|
80
|
1
|
|
|
|
|
333
|
use_ok('WWW::Automate'); |
|
81
|
1
|
|
|
|
|
451
|
use vars qw($agent); |
|
82
|
1
|
|
|
|
|
395
|
} |
|
83
|
1
|
|
|
|
|
516
|
|
|
84
|
1
|
|
|
|
|
383
|
ok(WWW::Automate->can('new'), "can we call new?"); |
|
85
|
|
|
|
|
|
|
ok($agent = WWW::Automate->new(), "create agent object"); |
|
86
|
|
|
|
|
|
|
isa_ok($agent, 'WWW::Automate', "agent is a WWW::Automate"); |
|
87
|
|
|
|
|
|
|
can_ok($agent, 'request'); # as a subclass of LWP::UserAgent |
|
88
|
|
|
|
|
|
|
like($agent->agent(), qr/WWW-Automate/, "Set user agent string"); |
|
89
|
|
|
|
|
|
|
like($agent->agent(), qr/$WWW::Automate::VERSION/, "Set user agent version"); |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=end testing |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
our $base = "http://localhost/"; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub new { |
|
98
|
3
|
|
|
3
|
1
|
5
|
shift; |
|
99
|
3
|
|
|
|
|
20
|
warn "WWW::Automate is no longer maintained. Please use WWW::Mechanize instead.\n"; |
|
100
|
3
|
|
|
|
|
28
|
my $self = { page_stack => [] }; |
|
101
|
3
|
|
|
|
|
5
|
bless $self; |
|
102
|
3
|
|
|
|
|
25
|
$self->agent("WWW-Automate-$VERSION"); |
|
103
|
3
|
|
|
|
|
988
|
$self->env_proxy(); |
|
104
|
3
|
|
|
|
|
6906
|
return $self; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 $agent->get($url) |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Given a URL/URI, fetches it. |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The results are stored internally in the agent object, as follows: |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
uri The current URI |
|
114
|
|
|
|
|
|
|
req The current request object [HTTP::Request] |
|
115
|
|
|
|
|
|
|
res The response received [HTTP::Response] |
|
116
|
|
|
|
|
|
|
status The status code of the response |
|
117
|
|
|
|
|
|
|
ct The content type of the response |
|
118
|
|
|
|
|
|
|
base The base URI for current response |
|
119
|
|
|
|
|
|
|
content The content of the response |
|
120
|
|
|
|
|
|
|
forms Array of forms found in content [HTML::Form] |
|
121
|
|
|
|
|
|
|
form Current form [HTML::Form] |
|
122
|
1
|
|
|
|
|
384
|
links Array of links found in content |
|
|
1
|
|
|
|
|
10
|
|
|
123
|
1
|
|
|
|
|
412
|
|
|
124
|
1
|
|
|
|
|
422
|
You can get at them with, for example: $agent->{content} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=begin testing |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
ok($agent->get("http://google.com"), "Get google webpage"); |
|
129
|
|
|
|
|
|
|
isa_ok($agent->{uri}, "URI", "Set uri"); |
|
130
|
|
|
|
|
|
|
isa_ok($agent->{req}, 'HTTP::Request', "req should be a HTTP::Request"); |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=end testing |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub get { |
|
137
|
3
|
|
|
3
|
1
|
9
|
my ($self, $uri) = @_; |
|
138
|
3
|
|
|
|
|
25
|
$self->{uri} = URI->new_abs($uri, $base); |
|
139
|
3
|
|
|
|
|
10699
|
$self->{req} = HTTP::Request->new(GET => $uri); |
|
140
|
3
|
|
|
|
|
414
|
$self->do_request(); |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 $agent->follow($string|$num) |
|
144
|
|
|
|
|
|
|
|
|
145
|
1
|
|
|
|
|
351
|
Follow a link. If you provide a string, the first link whose text |
|
|
1
|
|
|
|
|
7
|
|
|
146
|
1
|
|
|
|
|
319
|
matches that string will be followed. If you provide a number, it will |
|
147
|
1
|
|
|
|
|
665
|
be the nth link on the page. |
|
148
|
|
|
|
|
|
|
|
|
149
|
1
|
|
|
|
|
200
|
=begin testing |
|
150
|
1
|
|
|
|
|
202
|
|
|
151
|
1
|
|
|
|
|
406
|
ok(! $agent->follow(99999), "Can't follow too-high-numbered link"); |
|
152
|
|
|
|
|
|
|
ok($agent->follow(1), "Can follow first link"); |
|
153
|
|
|
|
|
|
|
ok($agent->back(), "Can go back"); |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
ok(! $agent->follow(qr/asdfghjksdfghj/), "Can't follow unlikely named link"); |
|
156
|
|
|
|
|
|
|
ok($agent->follow("Search"), "Can follow obvious named link"); |
|
157
|
|
|
|
|
|
|
$agent->back(); |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=end testing |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub follow { |
|
164
|
4
|
|
|
4
|
1
|
10
|
my ($self, $link) = @_; |
|
165
|
4
|
|
|
|
|
5
|
my @links = @{$self->{links}}; |
|
|
4
|
|
|
|
|
11
|
|
|
166
|
4
|
|
|
|
|
6
|
my $thislink; |
|
167
|
4
|
100
|
|
|
|
11
|
if (isnumber($link)) { |
|
168
|
2
|
50
|
|
|
|
7
|
if ($link <= $#links) { |
|
169
|
0
|
|
|
|
|
0
|
$thislink = $links[$link]; |
|
170
|
|
|
|
|
|
|
} else { |
|
171
|
2
|
|
|
|
|
18
|
warn "Link number $link is greater than maximum link $#links ", |
|
172
|
|
|
|
|
|
|
"on this page ($self->{uri})\n"; |
|
173
|
2
|
|
|
|
|
51
|
return undef; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} else { # user provided a regexp |
|
176
|
2
|
|
|
|
|
5
|
LINK: foreach my $l (@links) { |
|
177
|
2
|
50
|
|
|
|
20
|
if ($l->[1] =~ /$link/) { |
|
178
|
0
|
|
|
|
|
0
|
$thislink = $l; # grab first match |
|
179
|
0
|
|
|
|
|
0
|
last LINK; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
} |
|
182
|
2
|
50
|
|
|
|
7
|
unless ($thislink) { |
|
183
|
2
|
|
|
|
|
9
|
warn "Can't find any link matching $link on this page ", |
|
184
|
|
|
|
|
|
|
"($self->{uri})\n"; |
|
185
|
2
|
|
|
|
|
33
|
return undef; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
0
|
$thislink = $thislink->[0]; # we just want the URL, not the text |
|
190
|
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
$self->push_page_stack(); |
|
192
|
|
|
|
|
|
|
#print STDERR "thislink is $thislink, base is $self->{base}"; |
|
193
|
0
|
|
|
|
|
0
|
$self->{uri} = URI->new_abs($thislink, $self->{base}); |
|
194
|
0
|
|
|
|
|
0
|
$self->{req} = HTTP::Request->new(GET => $self->{uri}); |
|
195
|
0
|
|
|
|
|
0
|
$self->do_request(); |
|
196
|
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
0
|
return 1; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 $agent->form($number) |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Selects the Nth form on the page as the target for subsequent calls to |
|
203
|
1
|
|
|
|
|
2
|
field() and click(). Emits a warning and returns false if there is no |
|
|
1
|
|
|
|
|
6
|
|
|
204
|
1
|
|
|
|
|
10
|
such form. Forms are indexed from 1, that is to say, the first form is |
|
205
|
1
|
|
|
|
|
14
|
number 1 (not zero). |
|
206
|
1
|
|
|
|
|
666
|
|
|
207
|
1
|
|
|
|
|
263
|
=begin testing |
|
208
|
1
|
|
|
|
|
284
|
|
|
209
|
|
|
|
|
|
|
my $t = WWW::Automate->new(); |
|
210
|
|
|
|
|
|
|
$t->get("http://google.com"); |
|
211
|
|
|
|
|
|
|
ok($t->form(1), "Can select the first form"); |
|
212
|
|
|
|
|
|
|
is($t->{form}, $t->{forms}->[0], "Set the form attribute"); |
|
213
|
|
|
|
|
|
|
ok(! $t->form(99), "Can't select the 99th form"); |
|
214
|
|
|
|
|
|
|
is($t->{form}, $t->{forms}->[0], "Form is still set to 1"); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=end testing |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=cut |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub form { |
|
221
|
2
|
|
|
2
|
1
|
5
|
my ($self, $form) = @_; |
|
222
|
2
|
50
|
|
|
|
9
|
if ($self->{forms}->[$form-1]) { |
|
223
|
0
|
|
|
|
|
0
|
$self->{form} = $self->{forms}->[$form-1]; |
|
224
|
0
|
|
|
|
|
0
|
return 1; |
|
225
|
|
|
|
|
|
|
} else { |
|
226
|
2
|
|
|
|
|
480
|
carp "There is no form number $form"; |
|
227
|
2
|
|
|
|
|
109
|
return 0; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 $agent->field($name, $value, $number) |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Given the name of a field, set its value to the value specified. This |
|
234
|
|
|
|
|
|
|
applies to the current form (as set by the form() method or defaulting |
|
235
|
|
|
|
|
|
|
to the first form on the page). |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
The optional $number parameter is used to distinguish between two fields |
|
238
|
|
|
|
|
|
|
with the same name. The fields are numbered from 1. |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub field { |
|
243
|
1
|
|
|
1
|
1
|
3
|
my ($self, $name, $value, $number) = @_; |
|
244
|
1
|
|
50
|
|
|
10
|
$number ||= 1; |
|
245
|
1
|
50
|
|
|
|
6
|
if ($number > 1) { |
|
246
|
0
|
|
|
|
|
0
|
$form->find_input($name, $number)->value($value); |
|
247
|
|
|
|
|
|
|
} else { |
|
248
|
1
|
|
|
|
|
315
|
$self->{form}->value($name => $value); |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 $agent->click($button, $x, $y); |
|
253
|
|
|
|
|
|
|
|
|
254
|
1
|
|
|
|
|
290
|
Has the effect of clicking a button on a form. This method takes an |
|
|
1
|
|
|
|
|
7
|
|
|
255
|
1
|
|
|
|
|
6
|
optional method which is the name of the button to be pressed. If there |
|
256
|
1
|
|
|
|
|
8
|
is only one button on the form, it simply clicks that one button. |
|
257
|
0
|
|
|
|
|
0
|
|
|
258
|
0
|
|
|
|
|
0
|
=begin testing |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my $t = WWW::Automate->new(); |
|
261
|
|
|
|
|
|
|
$t->get("http://google.com"); |
|
262
|
|
|
|
|
|
|
$t->field(q => "foo"); |
|
263
|
|
|
|
|
|
|
ok($t->click("btnG"), "Can click 'btnG' ('Google Search' button)"); |
|
264
|
|
|
|
|
|
|
like($t->{content}, qr/foo\s?fighters/i, "Found 'Foo Fighters'"); |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=end testing |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub click { |
|
271
|
0
|
|
|
0
|
1
|
0
|
my ($self, $button, $x, $y) = @_; |
|
272
|
0
|
0
|
|
|
|
0
|
for ($x, $y) { $_ = 1 unless defined; } |
|
|
0
|
|
|
|
|
0
|
|
|
273
|
0
|
|
|
|
|
0
|
$self->push_page_stack(); |
|
274
|
0
|
|
|
|
|
0
|
$self->{uri} = $self->{form}->uri; |
|
275
|
0
|
|
|
|
|
0
|
$self->{req} = $self->{form}->click($name, $x, $y); |
|
276
|
0
|
|
|
|
|
0
|
$self->do_request(); |
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 $agent->submit() |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
Shortcut for $a->click("submit") |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=cut |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub submit { |
|
286
|
0
|
|
|
0
|
1
|
0
|
my ($self) = shift; |
|
287
|
0
|
|
|
|
|
0
|
$self->click("submit"); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 $agent->back(); |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
The equivalent of hitting the "back" button in a browser. Returns to |
|
293
|
|
|
|
|
|
|
the previous page. Won't go back past the first page. |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=cut |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub back { |
|
298
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
299
|
2
|
|
|
|
|
4
|
$self->pop_page_stack; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=head2 $agent->add_header(name => $value) |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Sets a header for the WWW::Automate agent to use every time it gets a |
|
305
|
|
|
|
|
|
|
webpage. This is *NOT* stored in the agent object (because if it were, |
|
306
|
|
|
|
|
|
|
it would disappear if you went back() past where you'd set it) but in |
|
307
|
|
|
|
|
|
|
the hash variable %WWW::Automate::headers, which is a hashref of all headers |
|
308
|
0
|
|
|
|
|
0
|
to be set. You can manipulate this directly if you want to; the |
|
|
0
|
|
|
|
|
0
|
|
|
309
|
0
|
|
|
|
|
0
|
add_header() method is just provided as a convenience function for the most |
|
310
|
|
|
|
|
|
|
common case of adding a header. |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=begin testing |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$agent->add_header(foo => 'bar'); |
|
315
|
|
|
|
|
|
|
is($WWW::Automate::headers{'foo'}, 'bar', "set header"); |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=end testing |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub add_header { |
|
322
|
0
|
|
|
0
|
1
|
0
|
my ($self, $name, $value) = @_; |
|
323
|
0
|
|
|
|
|
0
|
$WWW::Automate::headers{$name} = $value; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
These methods are only used internally. You probably don't need to |
|
329
|
|
|
|
|
|
|
know about them. |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 push_page_stack() |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head2 pop_page_stack() |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
The agent keeps a stack of visited pages, which it can pop when it needs |
|
336
|
|
|
|
|
|
|
to go BACK and so on. |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
The current page needs to be pushed onto the stack before we get a new |
|
339
|
|
|
|
|
|
|
page, and the stack needs to be popped when BACK occurs. |
|
340
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
341
|
0
|
|
|
|
|
0
|
Neither of these take any arguments, they just operate on the $agent |
|
342
|
0
|
|
|
|
|
0
|
object. |
|
|
0
|
|
|
|
|
0
|
|
|
343
|
0
|
|
|
|
|
0
|
|
|
344
|
0
|
|
|
|
|
0
|
=begin testing |
|
|
0
|
|
|
|
|
0
|
|
|
345
|
0
|
|
|
|
|
0
|
|
|
346
|
0
|
|
|
|
|
0
|
my $t = WWW::Automate->new(); |
|
|
0
|
|
|
|
|
0
|
|
|
347
|
0
|
|
|
|
|
0
|
$t->get("http://www.google.com"); |
|
348
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 0, "Page stack starts empty"); |
|
|
0
|
|
|
|
|
0
|
|
|
349
|
0
|
|
|
|
|
0
|
$t->push_page_stack(); |
|
350
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 1, "Pushed item onto page stack"); |
|
|
0
|
|
|
|
|
0
|
|
|
351
|
0
|
|
|
|
|
0
|
$t->push_page_stack(); |
|
352
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 2, "Pushed item onto page stack"); |
|
|
0
|
|
|
|
|
0
|
|
|
353
|
|
|
|
|
|
|
$t->pop_page_stack(); |
|
354
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 1, "Popped item from page stack"); |
|
355
|
|
|
|
|
|
|
$t->pop_page_stack(); |
|
356
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 0, "Popped item from page stack"); |
|
357
|
|
|
|
|
|
|
$t->pop_page_stack(); |
|
358
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 0, "Can't pop beyond end of page stack"); |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=end testing |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=cut |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub push_page_stack { |
|
366
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
367
|
0
|
|
|
|
|
0
|
$self->{page_stack} = [ @{$self->{page_stack}}, clone($self)]; |
|
|
0
|
|
|
|
|
0
|
|
|
368
|
0
|
|
|
|
|
0
|
return 1; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub pop_page_stack { |
|
372
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
|
373
|
2
|
50
|
|
|
|
3
|
if (@{$self->{page_stack}}) { |
|
|
2
|
|
|
|
|
7
|
|
|
374
|
0
|
|
|
|
|
0
|
$self = pop @{$self->{page_stack}}; |
|
|
0
|
|
|
|
|
0
|
|
|
375
|
0
|
|
|
|
|
0
|
bless $self; |
|
376
|
|
|
|
|
|
|
} |
|
377
|
2
|
|
|
|
|
6
|
return 1; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 extract_links() |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Extracts HREF links from the content of a webpage. |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub extract_links { |
|
387
|
3
|
|
|
3
|
1
|
40
|
my $self = shift; |
|
388
|
3
|
|
|
|
|
18
|
my $p = HTML::TokeParser->new(\$self->{content}); |
|
389
|
3
|
|
|
|
|
503
|
my @links; |
|
390
|
|
|
|
|
|
|
|
|
391
|
3
|
|
|
|
|
14
|
while (my $token = $p->get_tag("a", "frame")) { |
|
392
|
3
|
50
|
|
|
|
937
|
my $url = $token->[0] eq 'a' ? $token->[1]{href} : $token->[1]{src}; |
|
393
|
3
|
50
|
|
|
|
13
|
next unless defined $url; # probably just a name link |
|
394
|
3
|
50
|
|
|
|
19
|
my $text = $token->[0] eq 'a' ? |
|
395
|
|
|
|
|
|
|
$p->get_trimmed_text("/a"):$token->[1]{name}; |
|
396
|
3
|
|
|
|
|
216
|
push(@links, [$url => $text]); |
|
397
|
|
|
|
|
|
|
} |
|
398
|
3
|
|
|
|
|
282
|
return \@links; |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head2 do_request() |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Actually performs a request on the $self->{req} request object, and sets |
|
404
|
|
|
|
|
|
|
a bunch of attributes on $self. |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub do_request { |
|
409
|
3
|
|
|
3
|
1
|
6
|
my ($self) = @_; |
|
410
|
3
|
|
|
|
|
14
|
foreach my $h (keys %WWW::Automate::headers) { |
|
411
|
0
|
|
|
|
|
0
|
$self->{req}->header( $h => $WWW::Automate::headers{$h} ); |
|
412
|
|
|
|
|
|
|
} |
|
413
|
3
|
|
|
|
|
22
|
$self->{res} = $self->request($self->{req}); |
|
414
|
3
|
|
|
|
|
176416
|
$self->{status} = $self->{res}->code; |
|
415
|
3
|
|
|
|
|
46
|
$self->{base} = $self->{res}->base; |
|
416
|
3
|
|
50
|
|
|
1707
|
$self->{ct} = $self->{res}->content_type || ""; |
|
417
|
3
|
|
|
|
|
156
|
$self->{content} = $self->{res}->content; |
|
418
|
|
|
|
|
|
|
|
|
419
|
3
|
50
|
|
|
|
59
|
if ($self->{ct} eq 'text/html') { |
|
420
|
3
|
|
|
|
|
28
|
$self->{forms} = [ HTML::Form->parse($self->{content}, $self->{res}->base) ]; |
|
421
|
3
|
50
|
|
|
|
3424
|
$self->{form} = $self->{forms}->[0] if @{$self->{forms}}; |
|
|
3
|
|
|
|
|
16
|
|
|
422
|
3
|
|
|
|
|
16
|
$self->{links} = $self->extract_links(); |
|
423
|
|
|
|
|
|
|
} |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub isnumber { |
|
427
|
4
|
|
|
4
|
0
|
6
|
my $in = shift; |
|
428
|
4
|
100
|
|
|
|
17
|
if ($in =~ /^\d+$/) { |
|
429
|
2
|
|
|
|
|
17
|
return 1; |
|
430
|
|
|
|
|
|
|
} else { |
|
431
|
2
|
|
|
|
|
4
|
return 0; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=head1 BUGS |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Please report any bugs via the system at http://rt.cpan.org/ |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=head1 AUTHOR |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
Kirrily "Skud" Robert |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=cut |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
1; |