File Coverage

blib/lib/WWW/Mechanize/Plugin/AutoWrite.pm
Criterion Covered Total %
statement 59 79 74.6
branch 7 18 38.8
condition 1 7 14.2
subroutine 17 18 94.4
pod 2 3 66.6
total 86 125 68.8


line stmt bran cond sub pod time code
1             package WWW::Mechanize::Plugin::AutoWrite;
2              
3             =head1 NAME
4              
5             WWW::Mechanize::Plugin::AutoWrite - WWW::Mechanize plugin that writes the fetched pages to the disk.
6              
7             =head1 SYNOPSIS
8              
9             use WWW::Mechanize;
10             use WWW::Mechanize::Plugin::AutoWrite;
11            
12             my $mech = WWW::Mechanize->new();
13             $mech->autowrite->file('/tmp/mech.html');
14            
15             $mech->get('http://search.cpan.org/');
16             # now the contents of the page is written to /tmp/mech.html
17              
18             or:
19              
20             my $mech = WWW::Mechanize->new();
21             $mech->autowrite->dir('/tmp/mech/');
22            
23             $mech->get('http://search.cpan.org/');
24             # now the contents of the page are written to /tmp/mech/001.html and the HTTP
25             # transaction is logged into /tmp/mech/001.http
26              
27             $mech->submit_form(
28             'form_name' => 'f',
29             'fields' => {
30             'query' => 'WWW::Mechanize::Plugin::AutoWrite',
31             'mode' => 'module',
32             },
33             );
34             # Now the pages are saved into /tmp/mech/002.html and /tmp/mech/002.http
35              
36              
37             or:
38              
39             my $mech = WWW::Mechanize->new();
40             $mech->autowrite->dir('/tmp/mech/'); # Save the whole session
41             $mech->autowrite->file('/tmp/mech/last.html'); # Save the last page in a file
42            
43             $mech->get('http://www.wikipedia.com/');
44             # now the contents of the page are written both to /tmp/mech/001.html and
45             # /tmp/mech/last.html
46             $mech->follow_link(text => 'Galego');
47              
48             =head1 DESCRIPTION
49              
50             L overrides the method
51             L with a custom version that records all HTTP
52             transactions into the disk. This has for effect that every time that a new HTTP
53             request is made (GET, POST, etc) the contents of the page returned by the server
54             and the HTTP transaction (the request and the response) are stored into local
55             file(s) on disk.
56              
57             If no destination file/folder is provided then this module will act as a noop
58             and nothing will be written to the disk. It's also possible to provide both a
59             file and a folder in order to have the HTTP session and the last page saved
60             simultaneously.
61              
62             =head1 RATIONALE
63              
64             The idea is to have the static page loaded into a web browser and to reload the
65             page as needed. A better idea is to use a browser that has a builtin mechanism
66             for monitoring changes to local files. The I web browser does this
67             automatically once a page is loaded through the procotol C
68              
69             Another reason for the existence of this module is to be able to trace an HTTP
70             transaction in order to debug better I operations performed on
71             complex web sites.
72              
73             =head1 ATTRIBUTES
74              
75             This module can be configured through the attributes enumerated here.
76              
77             =head2 file
78              
79             Get/set the name of the file where the last page downloaded (the content's of
80             the HTTP response) will be saved.
81              
82             Set this attribute to a false value in order to disable the saving of the last
83             page downloaded.
84              
85             =head2 dir
86              
87             Get/set the name of the folder where the HTTP session (the content's of the HTTP
88             response as well as the HTTP headers) will be saved.
89              
90             Set this attribute to a false value in order to disable the saving of the HTTP
91             session.
92              
93             =head2 counter
94              
95             Get/set the counter used no name each file with a unique name when saving the
96             HTTP session (the counter is used only when L is set).
97              
98              
99             It can be useful to reset the counter when multiple sessions need to be saved
100             into different folders.
101              
102             foreach my $entry (@entries) {
103             $mech->autowrite->dir("/tmp/$entry/");
104             $mech->autowrite->counter(0);
105             # Complex mechanize
106             mechanize_process($mech, $entry);
107             }
108              
109             =cut
110              
111              
112 1     1   396477 use 5.006;
  1         4  
  1         43  
113 1     1   6 use strict;
  1         1  
  1         34  
114 1     1   8 use warnings;
  1         2  
  1         56  
115              
116             our $VERSION = '0.06';
117              
118 1     1   6 use File::Slurp qw{ write_file };
  1         3  
  1         71  
119 1     1   8 use File::Path qw{ mkpath };
  1         2  
  1         53  
120 1     1   5 use File::Spec;
  1         3  
  1         34  
121 1     1   6 use File::Basename qw{ fileparse };
  1         2  
  1         196  
122              
123 1     1   841 use MIME::Types;
  1         6541  
  1         49  
124              
125 1     1   11 use base qw(Class::Accessor::Fast);
  1         3  
  1         1069  
126             __PACKAGE__->mk_accessors(
127             qw(
128             file
129             dir
130             counter
131             )
132             );
133              
134              
135             # We need a reference to the original method used by mechanize for the requests.
136             my $REQUEST_SUB;
137             BEGIN {
138 1     1   4702 $REQUEST_SUB = \&WWW::Mechanize::request;
139             }
140              
141              
142             # MIME types lookup
143             my $MIME_TYPES = MIME::Types->new();
144              
145              
146             =head1 METHODS
147              
148             This module offers the following public methods:
149              
150             =cut
151              
152              
153             #
154             # Returns the next iteration of the counter. This method initializes the counter
155             # the first time it's invoked.
156             #
157             sub _inc_counter {
158 0     0   0 my $self = shift;
159            
160 0 0       0 if (! defined $self->{'counter'}) {
161 0         0 $self->{'counter'} = 0;
162             }
163            
164 0         0 return ++$self->{'counter'};
165             }
166              
167              
168             =head2 write_to_file
169              
170             This method writes the HTTP requests into a file and/or a folder. It's called
171             automatically by mechanize once the plugin is loaded and configured.
172              
173             =cut
174              
175             sub write_to_file {
176 3     3 1 7 my $self = shift;
177 3         6 my ($mech, $request, $response) = @_;
178              
179              
180             # write to a single file if autowrite is set
181 3 50       16 if (my $filename = $self->file) {
182             # Make sure that the path to the file exists
183 3         97 my (undef, $folder) = fileparse($filename);
184 3         190 mkpath($folder);
185            
186 3         16 write_file($filename, $mech->content);
187             }
188              
189              
190             # write to multiple files in a folder if autowritedir is set
191 3 50       1155 if (my $foldername = $self->dir) {
192 0         0 mkpath($foldername); # works fine with already existing folders
193             # my $encoding = $response->content_encoding;
194              
195 0         0 my $counter = $self->_inc_counter();
196 0         0 my $file;
197            
198             # Get the extension of the file based on the mime-type
199 0   0     0 my $mime = $MIME_TYPES->type($response->content_type || 'text/plain');
200 0 0       0 my ($extension) = defined $mime ? $mime->extensions : ('txt');
201              
202              
203              
204             # Write the contents of the page
205 0         0 $file = File::Spec->catfile(
206             $foldername,
207             sprintf "%03d.%s", $counter, $extension
208             );
209 0         0 write_file($file, $mech->content);
210              
211            
212             # Remember that the response has the document body which we don't want at
213             # this point. So let's clone the response and get rid of the request's body.
214 0         0 $response = $response->clone();
215 0         0 $response->content(undef);
216              
217             # Write the HTTP transaction (request "\n" response)
218 0         0 $file = File::Spec->catfile(
219             $foldername,
220             sprintf "%03d.http", $counter
221             );
222            
223 0         0 write_file($file, $request->as_string, "\n", $response->as_string);
224             }
225             }
226              
227              
228             #
229             # NOTE: We are injecting methods into WWW::Mechanize it's evil but it's the only
230             # way for a plugin to work.
231             #
232             package WWW::Mechanize;
233              
234 1     1   11 use Scalar::Util qw{ blessed };
  1         2  
  1         62  
235 1     1   6 use Carp;
  1         2  
  1         60  
236              
237             =head1 WWW::Mechanize::request
238              
239             The method L is overriden by a custom version that will
240             invoke the original L and then record the request.
241              
242             =cut
243              
244             {
245              
246 1     1   6 no warnings qw{ redefine };
  1         2  
  1         322  
247              
248             sub request {
249 3     3 1 5596 my $self = shift;
250 3         7 my ($request, @args) = @_;
251              
252             # Perform the actual HTTP request
253 3         13 my $response = $REQUEST_SUB->($self, $request, @args);
254              
255            
256             # Write the request, response and contents
257 3 50       891265 if (exists $self->{autowrite}) {
258            
259 3         10 my $autowrite = $self->{autowrite};
260 3 50       11 if (_is_an_autowrite($autowrite)) {
261 3         15 $autowrite->write_to_file($self, $request, $response);
262             }
263             else {
264 0   0     0 croak "Wrong type for member 'autowrite', got ", ref($autowrite) || 'a scalar';
265             }
266             }
267            
268 3         41 return $response;
269             }
270              
271             }
272              
273              
274             =head1 WWW::Mechanize::autowrite
275              
276             This accessor returns the autowrite instance associated with this mechanize
277             instance. The first time that this method is invoked it will create
278              
279             =cut
280              
281             sub autowrite {
282 5     5 0 4212 my $self = shift;
283              
284             # The first time that this accessor is invoked
285            
286             # set
287 5 50       14 if (@_) {
288 0         0 my $autowrite = shift;
289              
290 0 0       0 if (! _is_an_autowrite($autowrite)) {
291 0         0 croak "Parameter must be an instance of WWW::Mechanize::Plugin::AutoWrite";
292             }
293              
294 0         0 $self->{'autowrite'} = $autowrite;
295             }
296             # get
297             else {
298 5 100       29 if (! exists $self->{'autowrite'}) {
299             # Create an autowrite instance on the fly
300 2         20 $self->{'autowrite'} = WWW::Mechanize::Plugin::AutoWrite->new();
301             }
302             }
303            
304 5         76 return $self->{'autowrite'};
305             }
306              
307              
308             #
309             # Returns true if the parameter is an instance of
310             # WWW::Mechanize::Plugin::AutoWrite.
311             #
312             sub _is_an_autowrite {
313 3     3   27 my ($autowrite) = @_;
314             return
315 3   33     61 blessed($autowrite)
316             && $autowrite->isa('WWW::Mechanize::Plugin::AutoWrite')
317             ;
318             }
319              
320              
321             1;
322              
323             =head1 COMPATIBILITY
324              
325             The version 0.04 has a different API and is not backward compatible. This
326             affects only the configuration of the plugin. The behaviour should be the same.
327              
328             =head1 SEE ALSO
329              
330             L
331              
332             =head1 AUTHOR
333              
334             Jozef Kutej, Ejkutej@cpan.orgE,
335              
336             Emmanuel Rodriguez, Epotyl@cpan.orgE
337              
338             =head1 COPYRIGHT AND LICENSE
339              
340             Copyright (C) 2007 by Jozef Kutej
341             Copyright (C) 2008 by Jozef Kutej, Emmanuel Rodriguez
342              
343             This library is free software; you can redistribute it and/or modify
344             it under the same terms as Perl itself, either Perl version 5.8.8 or,
345             at your option, any later version of Perl 5 you may have available.
346              
347             =cut