File Coverage

blib/lib/HTTP/Proxy/BodyFilter/save.pm
Criterion Covered Total %
statement 90 92 97.8
branch 46 50 92.0
condition 15 16 93.7
subroutine 13 13 100.0
pod 5 5 100.0
total 169 176 96.0


line stmt bran cond sub pod time code
1             package HTTP::Proxy::BodyFilter::save;
2             $HTTP::Proxy::BodyFilter::save::VERSION = '0.304';
3 3     3   16277 use strict;
  3         4  
  3         88  
4 3     3   429 use HTTP::Proxy;
  3         4  
  3         100  
5 3     3   420 use HTTP::Proxy::BodyFilter;
  3         5  
  3         63  
6 3     3   13 use vars qw( @ISA );
  3         3  
  3         143  
7             @ISA = qw( HTTP::Proxy::BodyFilter );
8 3     3   12 use Fcntl;
  3         3  
  3         738  
9 3     3   14 use File::Spec;
  3         4  
  3         60  
10 3     3   11 use File::Path;
  3         9  
  3         128  
11 3     3   19 use Carp;
  3         3  
  3         2672  
12              
13             sub init {
14 24     24 1 28 my $self = shift;
15              
16             # options
17 24         387 my %args = (
18             template => File::Spec->catfile( '%h', '%P' ),
19             no_host => 0,
20             no_dirs => 0,
21             cut_dirs => 0,
22             prefix => '',
23             filename => undef,
24             multiple => 1,
25             keep_old => 0, # no_clobber in wget parlance
26             timestamp => 0,
27             status => [ 200 ],
28             @_
29             );
30             # keep_old and timestamp can't be selected together
31 24 100 100     274 croak "Can't timestamp and keep older files at the same time"
32             if $args{keep_old} && $args{timestamp};
33 23 100       174 croak "status must be an array reference"
34             unless ref($args{status}) eq 'ARRAY';
35 24         233 croak "status must contain only HTTP codes"
36 22 100       23 if grep { !/^[12345]\d\d$/ } @{ $args{status} };
  22         43  
37 21 100 100     173 croak "filename must be a code reference"
38             if defined $args{filename} && !UNIVERSAL::isa( $args{filename}, 'CODE' );
39              
40 20         50 $self->{"_hpbf_save_filename_code"} = $args{filename};
41             $self->{"_hpbf_save_$_"} = $args{$_}
42 20         270 for qw( template no_host no_dirs cut_dirs prefix
43             multiple keep_old timestamp status );
44             }
45              
46             sub begin {
47 23     23 1 9961 my ( $self, $message ) = @_;
48              
49             # internal data initialisation
50 23         26 delete @{$self}{qw( _hpbf_save_filename _hpbf_save_fh )};
  23         38  
51              
52 23 100       126 my $uri = $message->isa( 'HTTP::Request' )
53             ? $message->uri : $message->request->uri;
54              
55             # save only the accepted status codes
56 23 100       212 if( $message->isa( 'HTTP::Response' ) ) {
57 4         13 my $code = $message->code;
58 4 100       31 return unless grep { $code eq $_ } @{ $self->{_hpbf_save_status} };
  5         28  
  4         11  
59             }
60            
61 22         18 my $file = '';
62 22 100       46 if( defined $self->{_hpbf_save_filename_code} ) {
63             # use the user-provided callback
64 4         8 $file = $self->{_hpbf_save_filename_code}->($message);
65 4 100 100     21 unless ( defined $file and $file ne '' ) {
66 2         6 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
67             "Filter will not save $uri" );
68 2         5 return;
69             }
70             }
71             else {
72             # set the template variables from the URI
73 18         50 my @segs = $uri->path_segments; # starts with an empty string
74 18         541 shift @segs;
75 18 100       51 splice(@segs, 0, $self->{_hpbf_save_cut_dirs} >= @segs
76             ? @segs - 1 : $self->{_hpbf_save_cut_dirs} );
77 18 100 100     88 my %vars = (
    100 100        
78             '%' => '%',
79             h => $self->{_hpbf_save_no_host} ? '' : $uri->host,
80             f => $segs[-1] || 'index.html', # same default as wget
81             p => $self->{_hpbf_save_no_dirs} ? $segs[-1] || 'index.html'
82             : File::Spec->catfile(@segs),
83             q => $uri->query,
84             );
85 18         558 pop @segs;
86 18 100       77 $vars{d}
    100          
87             = $self->{_hpbf_save_no_dirs} ? ''
88             : @segs ? File::Spec->catfile(@segs)
89             : '';
90 18 100       46 $vars{P} = $vars{p} . ( $vars{q} ? "?$vars{q}" : '' );
91            
92             # create the filename
93 18   66     131 $file = File::Spec->catfile( $self->{_hpbf_save_prefix} || (),
94             $self->{_hpbf_save_template} );
95 18         677 $file =~ s/%(.)/$vars{$1}/g;
96             }
97 20         199 $file = File::Spec->rel2abs( $file );
98              
99             # create the directory
100 20         232 my $dir = File::Spec->catpath( (File::Spec->splitpath($file))[ 0, 1 ], '' );
101 20 100       397 if( ! -e $dir ) {
102 5         6 eval { mkpath( $dir ) };
  5         2068  
103 5 50       14 if ($@) {
104 0         0 $self->proxy->log( HTTP::Proxy::ERROR, "HTBF::save",
105             "Unable to create directory $dir" );
106 0         0 return;
107             }
108 5         18 $self->proxy->log( HTTP::Proxy::FILTERS, "HTBF::save",
109             "Created directory $dir" );
110             }
111              
112             # keep old file?
113 20 100       332 if ( -e $file ) {
114 3 50       12 if ( $self->{_hpbf_save_timestamp} ) {
    100          
115             # FIXME timestamp
116             }
117             elsif ( $self->{_hpbf_save_keep_old} ) {
118 1         4 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
119             "Skip saving $uri" );
120 1         2 delete $self->{_hpbf_save_fh}; # it's a closed filehandle
121 1         3 return;
122             }
123             }
124              
125             # open and lock the file
126 19         31 my ( $ext, $n, $i ) = ( "", 0 );
127 19         17 my $flags = O_WRONLY | O_EXCL | O_CREAT;
128 19         1013 while( ! sysopen( $self->{_hpbf_save_fh}, "$file$ext", $flags ) ) {
129 2 50       7 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save",
130             "Too many errors opening $file$ext" ), return
131             if $i++ - $n == 10; # should be ok now
132 2 100       6 if( $self->{_hpbf_save_multiple} ) {
133 1         27 $ext = "." . ++$n while -e $file.$ext;
134 1         44 next;
135             }
136             else {
137 1         22 $flags = O_WRONLY | O_CREAT;
138             }
139             }
140              
141             # we have an open filehandle
142 19         65 $self->{_hpbf_save_filename} = $file.$ext;
143 19         38 binmode( $self->{_hpbf_save_fh} ); # for Win32 and friends
144 19         66 $self->proxy->log( HTTP::Proxy::FILTERS, "HPBF::save",
145             "Saving $uri to $file$ext" );
146             }
147              
148             sub filter {
149 8     8 1 2611 my ( $self, $dataref ) = @_;
150 8 100       17 return unless exists $self->{_hpbf_save_fh};
151              
152             # save the data to the file
153 4         15 my $res = $self->{_hpbf_save_fh}->syswrite( $$dataref );
154 4 50       102 $self->proxy->log( HTTP::Proxy::ERROR, "HPBF::save", "syswrite() error: $!")
155             if ! defined $res; # FIXME error handling
156             }
157              
158             sub end {
159 4     4 1 11 my ($self) = @_;
160              
161             # close file
162 4 100       10 if( $self->{_hpbf_save_fh} ) {
163 2         9 $self->{_hpbf_save_fh}->close; # FIXME error handling
164 2         21 delete $self->{_hpbf_save_fh};
165             }
166             }
167              
168 2     2 1 9 sub will_modify { 0 }
169              
170             1;
171              
172             __END__