File Coverage

blib/lib/WWW/Agent.pm
Criterion Covered Total %
statement 90 100 90.0
branch 8 16 50.0
condition 4 7 57.1
subroutine 17 18 94.4
pod 1 2 50.0
total 120 143 83.9


line stmt bran cond sub pod time code
1             package WWW::Agent;
2              
3 3     3   231115 use strict;
  3         8  
  3         117  
4 3     3   18 use warnings;
  3         6  
  3         101  
5 3     3   28 use Data::Dumper;
  3         6  
  3         171  
6 3     3   1009 use POE;
  3         59336  
  3         25  
7              
8             =pod
9              
10             =head1 NAME
11              
12             WWW::Agent - Abstract web browser
13              
14             =head1 SYNOPSIS
15              
16             use WWW::Agent
17             use WWW::Agent::Plugins::Focus;
18             ...
19             $agent = new WWW::Agent (plugins => [
20             new WWW::Agent::Plugins::Focus,
21             ...
22             ]);
23             $agent->run;
24              
25             =head1 DESCRIPTION
26              
27             The web agent is a minimalistic web browser, in that the only thing it
28             supports is to request an object. For this purpose, it maintains a
29             concept of I, similar to those found in most modern web
30             browsers. A request will be done in the context of a particular tab.
31             As a consequence, the agents can handle multiple requests, also
32             concurrently. This is achieved by using L underneath.
33              
34             As the agent is otherwise dumb, it is up to plugins to do something
35             useful. The range of possible features which plugins can add is
36             considerable: Plugins can take care off testing websites for correct
37             behaviour, scraping web sites and lauching external function,
38             spidering sites to analyze pages or link structures, etc.
39              
40             =head2 Plugin Events
41              
42             See L.
43              
44             =head1 INTERFACE
45              
46             =head2 Constructor
47              
48             The constructor expects a hash with the following key/value pairs:
49              
50             =over
51              
52             =item C (string):
53              
54             This is currently ignored.
55              
56             =item C (list reference, optional):
57              
58             Any number of plugins can be loaded into an agent. Each plugin must
59             be an object (instantiated from the appropriate class).
60              
61             The sequence of plugins in the list is significant as two or more
62             plugins might register for one and the same event. The execution of
63             the individual handler is organized according to the list.
64              
65             =item C (L object, optional):
66              
67             This object will be used to launch requests. Obviously any subclass
68             can be used, say, for providing special headers or one providing
69             additional caching.
70              
71             If no LWP::UserAgent object is passed in, the a generic one will be
72             used.
73              
74             =back
75              
76             =cut
77              
78             sub _filter {
79 8     8   11 my $kernel = shift;
80 8         12 my $heap = shift;
81 8         14 my $policies = shift;
82 8         10 my $tab = shift;
83 8         14 my $value = shift;
84              
85             #warn "policies ".Dumper $policies;
86              
87 8         35 foreach my $p (@$policies) {
88             #warn " in _compute before one code value=$value";
89 4         10 $value = &{$p} ($kernel, $heap, $tab, $value, @_);
  4         18  
90             }
91             #warn " in _compute $value";
92 8         99 return $value;
93             }
94              
95             sub _ok {
96 11     11   194 my $kernel = shift;
97 11         22 my $heap = shift;
98 11         18 my $policies = shift;
99              
100 11 50       39 return 1 unless $policies;
101 11         33 foreach (@$policies) {
102 2 50       14 &$_ ($kernel, $heap, @_) or die "not satisfied";
103             }
104 11         10661 return 1;
105             }
106              
107             sub new {
108 3     3 0 2493 my $class = shift;
109 3         13 my %options = @_;
110 3         11 my $self = bless {}, $class;
111              
112 3   50     40 $self->{name} = delete $options{name} || 'agent';
113 3   100     20 $self->{plugins} = delete $options{plugins} || [];
114              
115 3     3   156169 use LWP::UserAgent;
  3         69506  
  3         3971  
116 3   33     26 $self->{ua} = delete $options{ua} || LWP::UserAgent->new;
117              
118             POE::Session->create (
119             inline_states => {
120             _start => sub {
121 3     3   1344 my ($kernel, $heap) = @_[KERNEL, HEAP];
122 3         14 my ($plugins) = $_[ARG0];
123              
124 3         15 $kernel->alias_set ('agent');
125 3         169 $heap->{ua} = $self->{ua};
126              
127 3         29 $heap->{policies} = {
128             init => [],
129             cycle_consider => [],
130             cycle_prepare_request => [],
131             # cycle_initiate_request => [],
132             # cycle_analyze_response => [],
133             cycle_pos_response => [],
134             cycle_neg_response => [],
135             cycle_complete => [],
136             };
137              
138 3         9 foreach my $plugin (@$plugins) {
139 2         14 my $ns = $plugin->{namespace}; # reserving namespace
140 2 50       10 die "duplicate namespace '$ns'" if $heap->{$ns};
141 2         16 $heap->{$ns} = {};
142              
143 2 50       9 if ($plugin->{depends}) {
144 0         0 foreach (@{$plugin->{depends}}) {
  0         0  
145 0 0       0 die "plugin '$ns' depends on other plugin '$_' but that has not been loaded" unless $heap->{$_};
146             }
147             }
148              
149 2         4 foreach my $policy_group (keys %{$plugin->{hooks}}) {
  2         8  
150 6         32 my $policy = $plugin->{hooks}->{$policy_group};
151              
152 6 100       19 if ($heap->{policies}->{$policy_group}) { # policy group already exists
153 4         6 push @{$heap->{policies}->{$policy_group}}, $policy;
  4         14  
154             } else { # no policy group existed, lets make one
155 2         20 $kernel->state ( $policy_group, $policy );
156             }
157             }
158             }
159              
160 3         35 _ok ($kernel, $heap, $heap->{policies}->{init});
161             },
162            
163             cycle_start => sub {
164 4     4   1377 my ($kernel, $heap) = @_[KERNEL, HEAP];
165 4         18 my ($tab, $request) = @_[ARG0, ARG1];
166 4         25 $heap->{tabs}->{$tab}->{request} = $request;
167 4         21 $kernel->yield ('cycle_consider', $tab);
168             },
169             cycle_consider => sub {
170 4     4   705 my ($kernel, $heap) = @_[KERNEL, HEAP];
171 4         10 my ($tab) = $_[ARG0];
172              
173 4 50       26 if (_ok ($kernel, $heap, $heap->{policies}->{cycle_consider}, $tab, $heap->{tabs}->{$tab}->{request})) {
174 4         16 $kernel->yield ('cycle_prepare_request', $tab);
175             } else {
176 0         0 $kernel->yield ('cycle_complete', $tab);
177             }
178             },
179             cycle_prepare_request => sub {
180 4     4   823 my ($kernel, $heap) = @_[KERNEL, HEAP];
181 4         10 my ($tab) = $_[ARG0];
182 4         10 my $htab = $heap->{tabs}->{$tab};
183              
184 4         42 $htab->{request} = _filter ($kernel, $heap, $heap->{policies}->{cycle_prepare_request}, $tab, $htab->{request});
185 4         15 $kernel->yield ('cycle_initiate_request', $tab);
186             },
187             cycle_initiate_request => sub {
188 4     4   831 my ($kernel, $heap) = @_[KERNEL, HEAP];
189 4         11 my ($tab) = $_[ARG0];
190 4         11 my $htab = $heap->{tabs}->{$tab};
191              
192 4         59 $htab->{response} = $heap->{ua}->request ($htab->{request});
193 4         689 $kernel->yield ('cycle_analyze_response', $tab);
194             },
195             cycle_analyze_response => sub {
196 4     4   747 my ($kernel, $heap) = @_[KERNEL, HEAP];
197 4         10 my ($tab) = $_[ARG0];
198 4         14 my $htab = $heap->{tabs}->{$tab};
199              
200 4 50       22 if ($htab->{response}->is_success) {
201             # $htab->{current_url} = $htab->{response}->request->uri;
202 4         47 $kernel->yield ('cycle_pos_response', $tab);
203             } else {
204 0         0 $kernel->yield ('cycle_neg_response', $tab);
205             }
206             },
207             cycle_pos_response => sub {
208 4     4   1185 my ($kernel, $heap) = @_[KERNEL, HEAP];
209 4         9 my ($tab) = $_[ARG0];
210 4         12 my $htab = $heap->{tabs}->{$tab};
211              
212 4         19 $htab->{response} = _filter ($kernel, $heap, $heap->{policies}->{cycle_pos_response}, $tab, $htab->{response});
213 4         15 $kernel->yield ('cycle_complete', $tab);
214             },
215             cycle_neg_response => sub {
216 0     0   0 my ($kernel, $heap) = @_[KERNEL, HEAP];
217 0         0 my ($tab) = $_[ARG0];
218 0         0 my $htab = $heap->{tabs}->{$tab};
219              
220 0         0 $htab->{response} = _filter ($kernel, $heap, $heap->{policies}->{cycle_neg_response}, $tab, $htab->{response});
221 0         0 $kernel->yield ('cycle_complete', $tab);
222             },
223             cycle_complete => sub {
224 4     4   1263 my ($kernel, $heap) = @_[KERNEL, HEAP];
225 4         9 my ($tab) = $_[ARG0];
226              
227 4         19 _ok ($kernel, $heap, $heap->{policies}->{cycle_complete}, $tab);
228             },
229             },
230 3         144 args => [ $self->{plugins} ],
231             );
232 3         480 return $self;
233             }
234              
235             =pod
236              
237             =head2 Methods
238              
239             =over
240              
241             =item C (no parameters)
242              
243             This method makes the agent run and do whatever it is told to do. If
244             you have not posted any requests to C before that, then
245             the agent will immediately terminate.
246              
247             Consequently it is either your responsibility to task the agent with
248             requests, or the responsibility of specific plugins to do that. One
249             other option is to set up another L session which posts events to
250             the agent (it's POE name is C, btw).
251              
252             Example:
253              
254             my $a = new WWW::Agent (....);
255             use POE;
256             POE::Kernel->post ('agent', 'cycle_start', 'new_tab', 'http://www.example.org/');
257             $a->run; # fetch it and ... that's it
258              
259             =cut
260              
261             sub run {
262 3     3 1 14779 my $self = shift;
263 3         20 POE::Kernel->run();
264             }
265              
266             =pod
267              
268             =back
269              
270             =head1 SEE ALSO
271              
272             L, L, L
273              
274             =head1 AUTHOR
275              
276             Robert Barta, Erho@bigpond.net.auE
277              
278             =head1 COPYRIGHT AND LICENSE
279              
280             Copyright (C) 2005 by Robert Barta
281              
282             This library is free software; you can redistribute it and/or modify
283             it under the same terms as Perl itself, either Perl version 5.8.4 or,
284             at your option, any later version of Perl 5 you may have available.
285              
286             =cut
287              
288             our $VERSION = '0.03';
289             our $REVISION = '$Id: Agent.pm,v 1.3 2005/03/19 10:01:15 rho Exp $';
290              
291             1;
292              
293              
294             __END__