File Coverage

lib/File/Fetch.pm
Criterion Covered Total %
statement 299 522 57.2
branch 73 244 29.9
condition 19 69 27.5
subroutine 40 47 85.1
pod 4 4 100.0
total 435 886 49.1


line stmt bran cond sub pod time code
1             package File::Fetch;
2              
3 2     2   1694 use strict;
  2         4  
  2         70  
4 2     2   1080 use FileHandle;
  2         21636  
  2         10  
5 2     2   2368 use File::Temp;
  2         19678  
  2         137  
6 2     2   998 use File::Copy;
  2         4865  
  2         111  
7 2     2   12 use File::Spec;
  2         3  
  2         40  
8 2     2   10 use File::Spec::Unix;
  2         5  
  2         47  
9 2     2   8 use File::Basename qw[dirname];
  2         4  
  2         97  
10              
11 2     2   10 use Cwd qw[cwd];
  2         4  
  2         81  
12 2     2   11 use Carp qw[carp];
  2         3  
  2         88  
13 2     2   1350 use IPC::Cmd qw[can_run run QUOTE];
  2         103087  
  2         154  
14 2     2   16 use File::Path qw[mkpath];
  2         6  
  2         117  
15 2     2   13 use File::Temp qw[tempdir];
  2         3  
  2         126  
16 2     2   12 use Params::Check qw[check];
  2         20  
  2         83  
17 2     2   11 use Module::Load::Conditional qw[can_load];
  2         6  
  2         71  
18 2     2   11 use Locale::Maketext::Simple Style => 'gettext';
  2         2  
  2         9  
19              
20 2         528 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21             $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22             $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
23 2     2   490 ];
  2         39  
24              
25             $VERSION = '1.00';
26             $VERSION = eval $VERSION; # avoid warnings with development releases
27             $PREFER_BIN = 0; # XXX TODO implement
28             $FROM_EMAIL = 'File-Fetch@example.com';
29             $USER_AGENT = "File::Fetch/$VERSION";
30             $BLACKLIST = [qw|ftp|];
31             push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
32             $METHOD_FAIL = { };
33             $FTP_PASSIVE = 1;
34             $TIMEOUT = 0;
35             $DEBUG = 0;
36             $WARN = 1;
37             $FORCEIPV4 = 0;
38              
39             ### methods available to fetch the file depending on the scheme
40             $METHODS = {
41             http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42             https => [ qw|lwp wget curl| ],
43             ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
44             file => [ qw|lwp lftp file| ],
45             rsync => [ qw|rsync| ],
46             git => [ qw|git| ],
47             };
48              
49             ### silly warnings ###
50             local $Params::Check::VERBOSE = 1;
51             local $Params::Check::VERBOSE = 1;
52             local $Module::Load::Conditional::VERBOSE = 0;
53             local $Module::Load::Conditional::VERBOSE = 0;
54              
55             ### see what OS we are on, important for file:// uris ###
56 2     2   14 use constant ON_WIN => ($^O eq 'MSWin32');
  2         2  
  2         145  
57 2     2   11 use constant ON_VMS => ($^O eq 'VMS');
  2         4  
  2         117  
58 2     2   12 use constant ON_UNIX => (!ON_WIN);
  2         3  
  2         119  
59 2     2   12 use constant HAS_VOL => (ON_WIN);
  2         4  
  2         105  
60 2     2   11 use constant HAS_SHARE => (ON_WIN);
  2         3  
  2         209  
61 2     2   14 use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
  2         3  
  2         321  
62              
63             =pod
64              
65             =head1 NAME
66              
67             File::Fetch - A generic file fetching mechanism
68              
69             =head1 SYNOPSIS
70              
71             use File::Fetch;
72              
73             ### build a File::Fetch object ###
74             my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
75              
76             ### fetch the uri to cwd() ###
77             my $where = $ff->fetch() or die $ff->error;
78              
79             ### fetch the uri to /tmp ###
80             my $where = $ff->fetch( to => '/tmp' );
81              
82             ### parsed bits from the uri ###
83             $ff->uri;
84             $ff->scheme;
85             $ff->host;
86             $ff->path;
87             $ff->file;
88              
89             =head1 DESCRIPTION
90              
91             File::Fetch is a generic file fetching mechanism.
92              
93             It allows you to fetch any file pointed to by a C, C,
94             C, C or C uri by a number of different means.
95              
96             See the C section further down for details.
97              
98             =head1 ACCESSORS
99              
100             A C object has the following accessors
101              
102             =over 4
103              
104             =item $ff->uri
105              
106             The uri you passed to the constructor
107              
108             =item $ff->scheme
109              
110             The scheme from the uri (like 'file', 'http', etc)
111              
112             =item $ff->host
113              
114             The hostname in the uri. Will be empty if host was originally
115             'localhost' for a 'file://' url.
116              
117             =item $ff->vol
118              
119             On operating systems with the concept of a volume the second element
120             of a file:// is considered to the be volume specification for the file.
121             Thus on Win32 this routine returns the volume, on other operating
122             systems this returns nothing.
123              
124             On Windows this value may be empty if the uri is to a network share, in
125             which case the 'share' property will be defined. Additionally, volume
126             specifications that use '|' as ':' will be converted on read to use ':'.
127              
128             On VMS, which has a volume concept, this field will be empty because VMS
129             file specifications are converted to absolute UNIX format and the volume
130             information is transparently included.
131              
132             =item $ff->share
133              
134             On systems with the concept of a network share (currently only Windows) returns
135             the sharename from a file://// url. On other operating systems returns empty.
136              
137             =item $ff->path
138              
139             The path from the uri, will be at least a single '/'.
140              
141             =item $ff->file
142              
143             The name of the remote file. For the local file name, the
144             result of $ff->output_file will be used.
145              
146             =item $ff->file_default
147              
148             The name of the default local file, that $ff->output_file falls back to if
149             it would otherwise return no filename. For example when fetching a URI like
150             http://www.abc.net.au/ the contents retrieved may be from a remote file called
151             'index.html'. The default value of this attribute is literally 'file_default'.
152              
153             =cut
154              
155              
156             ##########################
157             ### Object & Accessors ###
158             ##########################
159              
160             {
161             ### template for autogenerated accessors ###
162             my $Tmpl = {
163             scheme => { default => 'http' },
164             host => { default => 'localhost' },
165             path => { default => '/' },
166             file => { required => 1 },
167             uri => { required => 1 },
168             userinfo => { default => '' },
169             vol => { default => '' }, # windows for file:// uris
170             share => { default => '' }, # windows for file:// uris
171             file_default => { default => 'file_default' },
172             tempdir_root => { required => 1 }, # Should be lazy-set at ->new()
173             _error_msg => { no_override => 1 },
174             _error_msg_long => { no_override => 1 },
175             };
176              
177             for my $method ( keys %$Tmpl ) {
178 2     2   67 no strict 'refs';
  2         4  
  2         12225  
179             *$method = sub {
180 414     414   27787 my $self = shift;
181 414 50       1055 $self->{$method} = $_[0] if @_;
182 414         4969 return $self->{$method};
183             }
184             }
185              
186             sub _create {
187 44     44   625 my $class = shift;
188 44         960 my %hash = @_;
189              
190 44 50       1536 my $args = check( $Tmpl, \%hash ) or return;
191              
192 44         20548 bless $args, $class;
193              
194 44 50 66     722 if( lc($args->scheme) ne 'file' and not $args->host ) {
195 0         0 return $class->_error(loc(
196             "Hostname required when fetching from '%1'",$args->scheme));
197             }
198              
199 44         160 for (qw[path]) {
200 44 50       202 unless( $args->$_() ) { # 5.5.x needs the ()
201 0         0 return $class->_error(loc("No '%1' specified",$_));
202             }
203             }
204              
205 44         259 return $args;
206             }
207             }
208              
209             =item $ff->output_file
210              
211             The name of the output file. This is the same as $ff->file,
212             but any query parameters are stripped off. For example:
213              
214             http://example.com/index.html?x=y
215              
216             would make the output file be C rather than
217             C.
218              
219             =back
220              
221             =cut
222              
223             sub output_file {
224 94     94 1 44661 my $self = shift;
225 94         1099 my $file = $self->file;
226              
227 94         2171 $file =~ s/\?.*$//g;
228              
229 94   33     310 $file ||= $self->file_default;
230              
231 94         2279 return $file;
232             }
233              
234             ### XXX do this or just point to URI::Escape?
235             # =head2 $esc_uri = $ff->escaped_uri
236             #
237             # =cut
238             #
239             # ### most of this is stolen straight from URI::escape
240             # { ### Build a char->hex map
241             # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
242             #
243             # sub escaped_uri {
244             # my $self = shift;
245             # my $uri = $self->uri;
246             #
247             # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
248             # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
249             # $escapes{$1} || $self->_fail_hi($1)/ge;
250             #
251             # return $uri;
252             # }
253             #
254             # sub _fail_hi {
255             # my $self = shift;
256             # my $char = shift;
257             #
258             # $self->_error(loc(
259             # "Can't escape '%1', try using the '%2' module instead",
260             # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
261             # ));
262             # }
263             #
264             # sub output_file {
265             #
266             # }
267             #
268             #
269             # }
270              
271             =head1 METHODS
272              
273             =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
274              
275             Parses the uri and creates a corresponding File::Fetch::Item object,
276             that is ready to be Ced and returns it.
277              
278             Returns false on failure.
279              
280             =cut
281              
282             sub new {
283 44     44 1 463555 my $class = shift;
284 44         330 my %hash = @_;
285              
286 44         184 my ($uri, $file_default, $tempdir_root);
287 44         498 my $tmpl = {
288             uri => { required => 1, store => \$uri },
289             file_default => { required => 0, store => \$file_default },
290             tempdir_root => { required => 0, store => \$tempdir_root },
291             };
292              
293 44 50       356 check( $tmpl, \%hash ) or return;
294              
295             ### parse the uri to usable parts ###
296 44 50       5937 my $href = $class->_parse_uri( $uri ) or return;
297              
298 44 50       139 $href->{file_default} = $file_default if $file_default;
299 44 50       108 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
300 44 50       159334 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root};
301              
302             ### make it into a FFI object ###
303 44 50       2472 my $ff = $class->_create( %$href ) or return;
304              
305              
306             ### return the object ###
307 44         1283 return $ff;
308             }
309              
310             ### parses an uri to a hash structure:
311             ###
312             ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
313             ###
314             ### becomes:
315             ###
316             ### $href = {
317             ### scheme => 'ftp',
318             ### host => 'ftp.cpan.org',
319             ### path => '/pub/mirror',
320             ### file => 'index.html'
321             ### };
322             ###
323             ### In the case of file:// urls there maybe be additional fields
324             ###
325             ### For systems with volume specifications such as Win32 there will be
326             ### a volume specifier provided in the 'vol' field.
327             ###
328             ### 'vol' => 'volumename'
329             ###
330             ### For windows file shares there may be a 'share' key specified
331             ###
332             ### 'share' => 'sharename'
333             ###
334             ### Note that the rules of what a file:// url means vary by the operating system
335             ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
336             ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
337             ### not '/foo/bar.txt'
338             ###
339             ### Similarly if the host interpreting the url is VMS then
340             ### file:///disk$user/my/notes/note12345.txt' means
341             ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
342             ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
343             ### Except for some cases in the File::Spec methods, Perl on VMS will generally
344             ### handle UNIX format file specifications.
345             ###
346             ### This means it is impossible to serve certain file:// urls on certain systems.
347             ###
348             ### Thus are the problems with a protocol-less specification. :-(
349             ###
350              
351             sub _parse_uri {
352 51     51   3021212 my $self = shift;
353 51 50       159 my $uri = shift or return;
354              
355 51         129 my $href = { uri => $uri };
356              
357             ### find the scheme ###
358 51         631 $uri =~ s|^(\w+)://||;
359 51         309 $href->{scheme} = $1;
360              
361             ### See rfc 1738 section 3.10
362             ### http://www.faqs.org/rfcs/rfc1738.html
363             ### And wikipedia for more on windows file:// urls
364             ### http://en.wikipedia.org/wiki/File://
365 51 100       204 if( $href->{scheme} eq 'file' ) {
366              
367 9         53 my @parts = split '/',$uri;
368              
369             ### file://hostname/...
370             ### file://hostname/...
371             ### normalize file://localhost with file:///
372 9   100     45 $href->{host} = $parts[0] || '';
373              
374             ### index in @parts where the path components begin;
375 9         17 my $index = 1;
376              
377             ### file:////hostname/sharename/blah.txt
378 9         10 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
379              
380             $href->{host} = $parts[2] || ''; # avoid warnings
381             $href->{share} = $parts[3] || ''; # avoid warnings
382              
383             $index = 4 # index after the share
384              
385             ### file:///D|/blah.txt
386             ### file:///D:/blah.txt
387 0         0 } elsif (HAS_VOL) {
388              
389             ### this code comes from dmq's patch, but:
390             ### XXX if volume is empty, wouldn't that be an error? --kane
391             ### if so, our file://localhost test needs to be fixed as wel
392             $href->{vol} = $parts[1] || '';
393              
394             ### correct D| style colume descriptors
395             $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
396              
397             $index = 2; # index after the volume
398             }
399              
400             ### rebuild the path from the leftover parts;
401 9         52 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
402              
403             } else {
404             ### using anything but qw() in hash slices may produce warnings
405             ### in older perls :-(
406 42         530 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
  42         219  
407             }
408              
409             ### split the path into file + dir ###
410 51         103 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
  51         2161  
411 51         282 $href->{path} = $parts[1];
412 51         176 $href->{file} = $parts[2];
413             }
414              
415             ### host will be empty if the target was 'localhost' and the
416             ### scheme was 'file'
417             $href->{host} = '' if ($href->{host} eq 'localhost') and
418 51 100 100     265 ($href->{scheme} eq 'file');
419              
420 51         195 return $href;
421             }
422              
423             =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
424              
425             Fetches the file you requested and returns the full path to the file.
426              
427             By default it writes to C, but you can override that by specifying
428             the C argument:
429              
430             ### file fetch to /tmp, full path to the file in $where
431             $where = $ff->fetch( to => '/tmp' );
432              
433             ### file slurped into $scalar, full path to the file in $where
434             ### file is downloaded to a temp directory and cleaned up at exit time
435             $where = $ff->fetch( to => \$scalar );
436              
437             Returns the full path to the downloaded file on success, and false
438             on failure.
439              
440             =cut
441              
442             sub fetch {
443 70 50   70 1 8771 my $self = shift or return;
444 70         612 my %hash = @_;
445              
446 70         158 my $target;
447 70         251741 my $tmpl = {
448             to => { default => cwd(), store => \$target },
449             };
450              
451 70 50       3196 check( $tmpl, \%hash ) or return;
452              
453 70         14102 my ($to, $fh);
454             ### you want us to slurp the contents
455 70 100 66     1333 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
456 35         709 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
457              
458             ### plain old fetch
459             } else {
460 35         215 $to = $target;
461              
462             ### On VMS force to VMS format so File::Spec will work.
463 35         91 $to = VMS::Filespec::vmspath($to) if ON_VMS;
464              
465             ### create the path if it doesn't exist yet ###
466 35 100       908 unless( -d $to ) {
467 1         6 eval { mkpath( $to ) };
  1         344  
468              
469 1 50       12 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
470             }
471             }
472              
473             ### set passive ftp if required ###
474 70         30019 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
475              
476             ### we dont use catfile on win32 because if we are using a cygwin tool
477             ### under cmd.exe they wont understand windows style separators.
478 70         1367 my $out_to = ON_WIN ? $to.'/'.$self->output_file
479             : File::Spec->catfile( $to, $self->output_file );
480              
481 70         246 for my $method ( @{ $METHODS->{$self->scheme} } ) {
  70         548  
482 70         855 my $sub = '_'.$method.'_fetch';
483              
484 70 50       1270 unless( __PACKAGE__->can($sub) ) {
485 0         0 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
486             $method));
487 0         0 next;
488             }
489              
490             ### method is blacklisted ###
491 70 50       289 next if grep { lc $_ eq $method } @$BLACKLIST;
  70         362  
492              
493             ### method is known to fail ###
494 70 100       448 next if $METHOD_FAIL->{$method};
495              
496             ### there's serious issues with IPC::Run and quoting of command
497             ### line arguments. using quotes in the wrong place breaks things,
498             ### and in the case of say,
499             ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
500             ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
501             ### it doesn't matter how you quote, it always fails.
502 30         400 local $IPC::Cmd::USE_IPC_RUN = 0;
503              
504 30 100       417 if( my $file = $self->$sub(
505             to => $out_to
506             )){
507              
508 24 50 33     1759 unless( -e $file && -s _ ) {
509 0         0 $self->_error(loc("'%1' said it fetched '%2', ".
510             "but it was not created",$method,$file));
511              
512             ### mark the failure ###
513 0         0 $METHOD_FAIL->{$method} = 1;
514              
515 0         0 next;
516              
517             } else {
518              
519             ### slurp mode?
520 24 100 66     604 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
521              
522             ### open the file
523 12 50       1053 open my $fh, "<$file" or do {
524 0         0 $self->_error(
525             loc("Could not open '%1': %2", $file, $!));
526 0         0 return;
527             };
528              
529             ### slurp
530 12         72 $$target = do { local $/; <$fh> };
  12         132  
  12         736  
531              
532             }
533              
534 24         2697 my $abs = File::Spec->rel2abs( $file );
535 24         1405 return $abs;
536              
537             }
538             }
539             }
540              
541              
542             ### if we got here, we looped over all methods, but we weren't able
543             ### to fetch it.
544 46         931 return;
545             }
546              
547             ########################
548             ### _*_fetch methods ###
549             ########################
550              
551             ### LWP fetching ###
552             sub _lwp_fetch {
553 1     1   3 my $self = shift;
554 1         5 my %hash = @_;
555              
556 1         2 my ($to);
557 1         14 my $tmpl = {
558             to => { required => 1, store => \$to }
559             };
560 1 50       6 check( $tmpl, \%hash ) or return;
561              
562             ### modules required to download with lwp ###
563 1         95 my $use_list = {
564             LWP => '0.0',
565             'LWP::UserAgent' => '0.0',
566             'HTTP::Request' => '0.0',
567             'HTTP::Status' => '0.0',
568             URI => '0.0',
569              
570             };
571              
572 1 50       9 if ($self->scheme eq 'https') {
573 0         0 $use_list->{'LWP::Protocol::https'} = '0';
574             }
575              
576             ### Fix CVE-2016-1238 ###
577 1         12 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
578 1 50       21 unless( can_load( modules => $use_list ) ) {
579 1         766 $METHOD_FAIL->{'lwp'} = 1;
580 1         14 return;
581             }
582              
583             ### setup the uri object
584 0         0 my $uri = URI->new( File::Spec::Unix->catfile(
585             $self->path, $self->file
586             ) );
587              
588             ### special rules apply for file:// uris ###
589 0         0 $uri->scheme( $self->scheme );
590 0 0       0 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
591              
592 0 0       0 if ($self->userinfo) {
    0          
593 0         0 $uri->userinfo($self->userinfo);
594             } elsif ($self->scheme ne 'file') {
595 0         0 $uri->userinfo("anonymous:$FROM_EMAIL");
596             }
597              
598             ### set up the useragent object
599 0         0 my $ua = LWP::UserAgent->new();
600 0 0       0 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
601 0         0 $ua->agent( $USER_AGENT );
602 0         0 $ua->from( $FROM_EMAIL );
603 0         0 $ua->env_proxy;
604              
605 0 0       0 my $res = $ua->mirror($uri, $to) or return;
606              
607             ### uptodate or fetched ok ###
608 0 0 0     0 if ( $res->code == 304 or $res->code == 200 ) {
609 0         0 return $to;
610              
611             } else {
612 0         0 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
613             $res->code, HTTP::Status::status_message($res->code),
614             $res->status_line));
615             }
616              
617             }
618              
619             ### HTTP::Tiny fetching ###
620             sub _httptiny_fetch {
621 6     6   54 my $self = shift;
622 6         53 my %hash = @_;
623              
624 6         15 my ($to);
625 6         80 my $tmpl = {
626             to => { required => 1, store => \$to }
627             };
628 6 50       66 check( $tmpl, \%hash ) or return;
629              
630 6         728 my $use_list = {
631             'HTTP::Tiny' => '0.008',
632              
633             };
634              
635             ### Fix CVE-2016-1238 ###
636 6         79 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
637 6 50       153 unless( can_load(modules => $use_list) ) {
638 0         0 $METHOD_FAIL->{'httptiny'} = 1;
639 0         0 return;
640             }
641              
642 6         56687 my $uri = $self->uri;
643              
644 6 50       199 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
645              
646 6         1473 my $rc = $http->mirror( $uri, $to );
647              
648 6 50       1142089 unless ( $rc->{success} ) {
649              
650             return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
651 0         0 $rc->{status}, $rc->{reason} ) );
652              
653             }
654              
655 6         1113 return $to;
656              
657             }
658              
659             ### HTTP::Lite fetching ###
660             sub _httplite_fetch {
661 1     1   18 my $self = shift;
662 1         14 my %hash = @_;
663              
664 1         8 my ($to);
665 1         18 my $tmpl = {
666             to => { required => 1, store => \$to }
667             };
668 1 50       17 check( $tmpl, \%hash ) or return;
669              
670             ### modules required to download with lwp ###
671 1         117 my $use_list = {
672             'HTTP::Lite' => '2.2',
673             'MIME::Base64' => '0',
674             };
675              
676             ### Fix CVE-2016-1238 ###
677 1         15 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
678 1 50       30 unless( can_load(modules => $use_list) ) {
679 1         4745 $METHOD_FAIL->{'httplite'} = 1;
680 1         13 return;
681             }
682              
683 0         0 my $uri = $self->uri;
684 0         0 my $retries = 0;
685              
686 0         0 RETRIES: while ( $retries++ < 5 ) {
687              
688 0         0 my $http = HTTP::Lite->new();
689             # Naughty naughty but there isn't any accessor/setter
690 0 0       0 $http->{timeout} = $TIMEOUT if $TIMEOUT;
691 0         0 $http->http11_mode(1);
692              
693 0 0       0 if ($self->userinfo) {
694 0         0 my $encoded = MIME::Base64::encode($self->userinfo, '');
695 0         0 $http->add_req_header("Authorization", "Basic $encoded");
696             }
697              
698 0         0 my $fh = FileHandle->new;
699              
700 0 0       0 unless ( $fh->open($to,'>') ) {
701 0         0 return $self->_error(loc(
702             "Could not open '%1' for writing: %2",$to,$!));
703             }
704              
705 0         0 $fh->autoflush(1);
706              
707 0         0 binmode $fh;
708              
709 0     0   0 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
  0         0  
  0         0  
  0         0  
  0         0  
710              
711 0         0 close $fh;
712              
713 0 0 0     0 if ( $rc == 301 || $rc == 302 ) {
    0          
714 0         0 my $loc;
715 0         0 HEADERS: for ($http->headers_array) {
716 0 0       0 /Location: (\S+)/ and $loc = $1, last HEADERS;
717             }
718             #$loc or last; # Think we should squeal here.
719 0 0       0 if ($loc =~ m!^/!) {
720 0         0 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
721 0         0 $uri .= $loc;
722             }
723             else {
724 0         0 $uri = $loc;
725             }
726 0         0 next RETRIES;
727             }
728             elsif ( $rc == 200 ) {
729 0         0 return $to;
730             }
731             else {
732 0         0 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
733             $rc, $http->status_message));
734             }
735              
736             } # Loop for 5 retries.
737              
738 0         0 return $self->_error("Fetch failed! Gave up after 5 tries");
739              
740             }
741              
742             ### Simple IO::Socket::INET fetching ###
743             sub _iosock_fetch {
744 6     6   49 my $self = shift;
745 6         79 my %hash = @_;
746              
747 6         17 my ($to);
748 6         71 my $tmpl = {
749             to => { required => 1, store => \$to }
750             };
751 6 50       54 check( $tmpl, \%hash ) or return;
752              
753 6         621 my $use_list = {
754             'IO::Socket::INET' => '0.0',
755             'IO::Select' => '0.0',
756             };
757              
758             ### Fix CVE-2016-1238 ###
759 6         89 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
760 6 50       135 unless( can_load(modules => $use_list) ) {
761 0         0 $METHOD_FAIL->{'iosock'} = 1;
762 0         0 return;
763             }
764              
765 6 50       9188 my $sock = IO::Socket::INET->new(
766             PeerHost => $self->host,
767             ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
768             );
769              
770 6 50       596378 unless ( $sock ) {
771 0         0 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
772             }
773              
774 6         239 my $fh = FileHandle->new;
775              
776             # Check open()
777              
778 6 50       837 unless ( $fh->open($to,'>') ) {
779 0         0 return $self->_error(loc(
780             "Could not open '%1' for writing: %2",$to,$!));
781             }
782              
783 6         1990 $fh->autoflush(1);
784 6         609 binmode $fh;
785              
786 6         52 my $path = File::Spec::Unix->catfile( $self->path, $self->file );
787 6         131 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
788 6         157 $sock->send( $req );
789              
790 6         1469 my $select = IO::Select->new( $sock );
791              
792 6         1016 my $resp = '';
793 6         31 my $normal = 0;
794 6   50     164 while ( $select->can_read( $TIMEOUT || 60 ) ) {
795 12         522918 my $ret = $sock->sysread( $resp, 4096, length($resp) );
796 12 100 66     802 if ( !defined $ret or $ret == 0 ) {
797 6         61 $select->remove( $sock );
798 6         574 $normal++;
799             }
800             }
801 6         978 close $sock;
802              
803 6 50       43 unless ( $normal ) {
804 0   0     0 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
805             }
806              
807             # Check the "response"
808             # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
809 6         88 $resp =~ s/^(\x0d?\x0a)+//;
810             # Check it is an HTTP response
811 6 50       77 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
812 0         0 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
813             }
814              
815             # Check for OK
816 6         75 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
817 6 50       37 unless ( $code eq '200' ) {
818 0         0 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
819             }
820              
821             {
822 6         16 local $\;
  6         100  
823 6         727 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
824             }
825 6         182 close $fh;
826 6         262 return $to;
827             }
828              
829             ### Net::FTP fetching
830             sub _netftp_fetch {
831 0     0   0 my $self = shift;
832 0         0 my %hash = @_;
833              
834 0         0 my ($to);
835 0         0 my $tmpl = {
836             to => { required => 1, store => \$to }
837             };
838 0 0       0 check( $tmpl, \%hash ) or return;
839              
840             ### required modules ###
841 0         0 my $use_list = { 'Net::FTP' => 0 };
842              
843             ### Fix CVE-2016-1238 ###
844 0         0 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
845 0 0       0 unless( can_load( modules => $use_list ) ) {
846 0         0 $METHOD_FAIL->{'netftp'} = 1;
847 0         0 return;
848             }
849              
850             ### make connection ###
851 0         0 my $ftp;
852 0         0 my @options = ($self->host);
853 0 0       0 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
854 0 0       0 unless( $ftp = Net::FTP->new( @options ) ) {
855 0         0 return $self->_error(loc("Ftp creation failed: %1",$@));
856             }
857              
858             ### login ###
859 0 0       0 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
860 0         0 return $self->_error(loc("Could not login to '%1'",$self->host));
861             }
862              
863             ### set binary mode, just in case ###
864 0         0 $ftp->binary;
865              
866             ### create the remote path
867             ### remember remote paths are unix paths! [#11483]
868 0         0 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
869              
870             ### fetch the file ###
871 0         0 my $target;
872 0 0       0 unless( $target = $ftp->get( $remote, $to ) ) {
873 0         0 return $self->_error(loc("Could not fetch '%1' from '%2'",
874             $remote, $self->host));
875             }
876              
877             ### log out ###
878 0         0 $ftp->quit;
879              
880 0         0 return $target;
881              
882             }
883              
884             ### /bin/wget fetch ###
885             sub _wget_fetch {
886 8     8   70 my $self = shift;
887 8         88 my %hash = @_;
888              
889 8         75 my ($to);
890 8         133 my $tmpl = {
891             to => { required => 1, store => \$to }
892             };
893 8 50       52 check( $tmpl, \%hash ) or return;
894              
895 8         766 my $wget;
896             ### see if we have a wget binary ###
897 8 50       146 unless( $wget = can_run('wget') ) {
898 0         0 $METHOD_FAIL->{'wget'} = 1;
899 0         0 return;
900             }
901              
902             ### no verboseness, thanks ###
903 8         4559 my $cmd = [ $wget, '--quiet' ];
904              
905             ### if a timeout is set, add it ###
906 8 50       47 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
907              
908             ### run passive if specified ###
909 8 50       46 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
910              
911             ### set the output document, add the uri ###
912 8         49 push @$cmd, '--output-document', $to, $self->uri;
913              
914             ### with IPC::Cmd > 0.41, this is fixed in teh library,
915             ### and there's no need for special casing any more.
916             ### DO NOT quote things for IPC::Run, it breaks stuff.
917             # $IPC::Cmd::USE_IPC_RUN
918             # ? ($to, $self->uri)
919             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
920              
921             ### shell out ###
922 8         19 my $captured;
923 8 50       183 unless(run( command => $cmd,
924             buffer => \$captured,
925             verbose => $DEBUG
926             )) {
927             ### wget creates the output document always, even if the fetch
928             ### fails.. so unlink it in that case
929 0         0 1 while unlink $to;
930              
931 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
932             }
933              
934 8         1545655 return $to;
935             }
936              
937             ### /bin/lftp fetch ###
938             sub _lftp_fetch {
939 1     1   16 my $self = shift;
940 1         11 my %hash = @_;
941              
942 1         10 my ($to);
943 1         29 my $tmpl = {
944             to => { required => 1, store => \$to }
945             };
946 1 50       18 check( $tmpl, \%hash ) or return;
947              
948             ### see if we have a lftp binary ###
949 1         83 my $lftp;
950 1 50       31 unless( $lftp = can_run('lftp') ) {
951 1         117156 $METHOD_FAIL->{'lftp'} = 1;
952 1         14 return;
953             }
954              
955             ### no verboseness, thanks ###
956 0         0 my $cmd = [ $lftp, '-f' ];
957              
958 0         0 my $fh = File::Temp->new;
959              
960 0         0 my $str;
961              
962             ### if a timeout is set, add it ###
963 0 0       0 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
964              
965             ### run passive if specified ###
966 0 0       0 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
967              
968             ### set the output document, add the uri ###
969             ### quote the URI, because lftp supports certain shell
970             ### expansions, most notably & for backgrounding.
971             ### ' quote does nto work, must be "
972 0         0 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
973              
974 0 0       0 if( $DEBUG ) {
975 0         0 my $pp_str = join ' ', split $/, $str;
976 0         0 print "# lftp command: $pp_str\n";
977             }
978              
979             ### write straight to the file.
980 0         0 $fh->autoflush(1);
981 0         0 print $fh $str;
982              
983             ### the command needs to be 1 string to be executed
984 0         0 push @$cmd, $fh->filename;
985              
986             ### with IPC::Cmd > 0.41, this is fixed in teh library,
987             ### and there's no need for special casing any more.
988             ### DO NOT quote things for IPC::Run, it breaks stuff.
989             # $IPC::Cmd::USE_IPC_RUN
990             # ? ($to, $self->uri)
991             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
992              
993              
994             ### shell out ###
995 0         0 my $captured;
996 0 0       0 unless(run( command => $cmd,
997             buffer => \$captured,
998             verbose => $DEBUG
999             )) {
1000             ### wget creates the output document always, even if the fetch
1001             ### fails.. so unlink it in that case
1002 0         0 1 while unlink $to;
1003              
1004 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1005             }
1006              
1007 0         0 return $to;
1008             }
1009              
1010              
1011              
1012             ### /bin/ftp fetch ###
1013             sub _ftp_fetch {
1014 0     0   0 my $self = shift;
1015 0         0 my %hash = @_;
1016              
1017 0         0 my ($to);
1018 0         0 my $tmpl = {
1019             to => { required => 1, store => \$to }
1020             };
1021 0 0       0 check( $tmpl, \%hash ) or return;
1022              
1023             ### see if we have a ftp binary ###
1024 0         0 my $ftp;
1025 0 0       0 unless( $ftp = can_run('ftp') ) {
1026 0         0 $METHOD_FAIL->{'ftp'} = 1;
1027 0         0 return;
1028             }
1029              
1030 0         0 my $fh = FileHandle->new;
1031              
1032 0         0 local $SIG{CHLD} = 'IGNORE';
1033              
1034 0 0       0 unless ($fh->open("$ftp -n", '|-')) {
1035 0         0 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1036             }
1037              
1038 0         0 my @dialog = (
1039             "lcd " . dirname($to),
1040             "open " . $self->host,
1041             "user anonymous $FROM_EMAIL",
1042             "cd /",
1043             "cd " . $self->path,
1044             "binary",
1045             "get " . $self->file . " " . $self->output_file,
1046             "quit",
1047             );
1048              
1049 0         0 foreach (@dialog) { $fh->print($_, "\n") }
  0         0  
1050 0 0       0 $fh->close or return;
1051              
1052 0         0 return $to;
1053             }
1054              
1055             ### lynx is stupid - it decompresses any .gz file it finds to be text
1056             ### use /bin/lynx to fetch files
1057             sub _lynx_fetch {
1058 1     1   16 my $self = shift;
1059 1         9 my %hash = @_;
1060              
1061 1         11 my ($to);
1062 1         14 my $tmpl = {
1063             to => { required => 1, store => \$to }
1064             };
1065 1 50       17 check( $tmpl, \%hash ) or return;
1066              
1067             ### see if we have a lynx binary ###
1068 1         108 my $lynx;
1069 1 50       20 unless ( $lynx = can_run('lynx') ){
1070 1         638 $METHOD_FAIL->{'lynx'} = 1;
1071 1         18 return;
1072             }
1073              
1074 0 0       0 unless( IPC::Cmd->can_capture_buffer ) {
1075 0         0 $METHOD_FAIL->{'lynx'} = 1;
1076              
1077 0         0 return $self->_error(loc(
1078             "Can not capture buffers. Can not use '%1' to fetch files",
1079             'lynx' ));
1080             }
1081              
1082             ### check if the HTTP resource exists ###
1083 0 0       0 if ($self->uri =~ /^https?:\/\//i) {
1084 0         0 my $cmd = [
1085             $lynx,
1086             '-head',
1087             '-source',
1088             "-auth=anonymous:$FROM_EMAIL",
1089             ];
1090              
1091 0 0       0 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1092              
1093 0         0 push @$cmd, $self->uri;
1094              
1095             ### shell out ###
1096 0         0 my $head;
1097 0 0       0 unless(run( command => $cmd,
1098             buffer => \$head,
1099             verbose => $DEBUG )
1100             ) {
1101 0   0     0 return $self->_error(loc("Command failed: %1", $head || ''));
1102             }
1103              
1104 0 0       0 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1105 0   0     0 return $self->_error(loc("Command failed: %1", $head || ''));
1106             }
1107             }
1108              
1109             ### write to the output file ourselves, since lynx ass_u_mes to much
1110 0 0       0 my $local = FileHandle->new( $to, 'w' )
1111             or return $self->_error(loc(
1112             "Could not open '%1' for writing: %2",$to,$!));
1113              
1114             ### dump to stdout ###
1115 0         0 my $cmd = [
1116             $lynx,
1117             '-source',
1118             "-auth=anonymous:$FROM_EMAIL",
1119             ];
1120              
1121 0 0       0 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1122              
1123             ### DO NOT quote things for IPC::Run, it breaks stuff.
1124 0         0 push @$cmd, $self->uri;
1125              
1126             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1127             ### and there's no need for special casing any more.
1128             ### DO NOT quote things for IPC::Run, it breaks stuff.
1129             # $IPC::Cmd::USE_IPC_RUN
1130             # ? $self->uri
1131             # : QUOTE. $self->uri .QUOTE;
1132              
1133              
1134             ### shell out ###
1135 0         0 my $captured;
1136 0 0       0 unless(run( command => $cmd,
1137             buffer => \$captured,
1138             verbose => $DEBUG )
1139             ) {
1140 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1141             }
1142              
1143             ### print to local file ###
1144             ### XXX on a 404 with a special error page, $captured will actually
1145             ### hold the contents of that page, and make it *appear* like the
1146             ### request was a success, when really it wasn't :(
1147             ### there doesn't seem to be an option for lynx to change the exit
1148             ### code based on a 4XX status or so.
1149             ### the closest we can come is using --error_file and parsing that,
1150             ### which is very unreliable ;(
1151 0         0 $local->print( $captured );
1152 0 0       0 $local->close or return;
1153              
1154 0         0 return $to;
1155             }
1156              
1157             ### use /bin/ncftp to fetch files
1158             sub _ncftp_fetch {
1159 0     0   0 my $self = shift;
1160 0         0 my %hash = @_;
1161              
1162 0         0 my ($to);
1163 0         0 my $tmpl = {
1164             to => { required => 1, store => \$to }
1165             };
1166 0 0       0 check( $tmpl, \%hash ) or return;
1167              
1168             ### we can only set passive mode in interactive sessions, so bail out
1169             ### if $FTP_PASSIVE is set
1170 0 0       0 return if $FTP_PASSIVE;
1171              
1172             ### see if we have a ncftp binary ###
1173 0         0 my $ncftp;
1174 0 0       0 unless( $ncftp = can_run('ncftp') ) {
1175 0         0 $METHOD_FAIL->{'ncftp'} = 1;
1176 0         0 return;
1177             }
1178              
1179 0 0       0 my $cmd = [
1180             $ncftp,
1181             '-V', # do not be verbose
1182             '-p', $FROM_EMAIL, # email as password
1183             $self->host, # hostname
1184             dirname($to), # local dir for the file
1185             # remote path to the file
1186             ### DO NOT quote things for IPC::Run, it breaks stuff.
1187             $IPC::Cmd::USE_IPC_RUN
1188             ? File::Spec::Unix->catdir( $self->path, $self->file )
1189             : QUOTE. File::Spec::Unix->catdir(
1190             $self->path, $self->file ) .QUOTE
1191              
1192             ];
1193              
1194             ### shell out ###
1195 0         0 my $captured;
1196 0 0       0 unless(run( command => $cmd,
1197             buffer => \$captured,
1198             verbose => $DEBUG )
1199             ) {
1200 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1201             }
1202              
1203 0         0 return $to;
1204              
1205             }
1206              
1207             ### use /bin/curl to fetch files
1208             sub _curl_fetch {
1209 1     1   13 my $self = shift;
1210 1         23 my %hash = @_;
1211              
1212 1         10 my ($to);
1213 1         22 my $tmpl = {
1214             to => { required => 1, store => \$to }
1215             };
1216 1 50       22 check( $tmpl, \%hash ) or return;
1217 1         98 my $curl;
1218 1 50       24 unless ( $curl = can_run('curl') ) {
1219 1         621 $METHOD_FAIL->{'curl'} = 1;
1220 1         18 return;
1221             }
1222              
1223             ### these long opts are self explanatory - I like that -jmb
1224 0         0 my $cmd = [ $curl, '-q' ];
1225              
1226 0 0 0     0 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1227              
1228 0 0       0 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1229              
1230 0 0       0 push(@$cmd, '--silent') unless $DEBUG;
1231              
1232             ### curl does the right thing with passive, regardless ###
1233 0 0       0 if ($self->scheme eq 'ftp') {
1234 0         0 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1235             }
1236              
1237             ### curl doesn't follow 302 (temporarily moved) etc automatically
1238             ### so we add --location to enable that.
1239 0         0 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1240              
1241             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1242             ### and there's no need for special casing any more.
1243             ### DO NOT quote things for IPC::Run, it breaks stuff.
1244             # $IPC::Cmd::USE_IPC_RUN
1245             # ? ($to, $self->uri)
1246             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1247              
1248              
1249 0         0 my $captured;
1250 0 0       0 unless(run( command => $cmd,
1251             buffer => \$captured,
1252             verbose => $DEBUG )
1253             ) {
1254              
1255 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1256             }
1257              
1258 0         0 return $to;
1259              
1260             }
1261              
1262             ### /usr/bin/fetch fetch! ###
1263             sub _fetch_fetch {
1264 1     1   14 my $self = shift;
1265 1         12 my %hash = @_;
1266              
1267 1         10 my ($to);
1268 1         15 my $tmpl = {
1269             to => { required => 1, store => \$to }
1270             };
1271 1 50       21 check( $tmpl, \%hash ) or return;
1272              
1273             ### see if we have a fetch binary ###
1274 1         88 my $fetch;
1275 1 50 33     32 unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1276 1         7 $METHOD_FAIL->{'fetch'} = 1;
1277 1         13 return;
1278             }
1279              
1280             ### no verboseness, thanks ###
1281 0         0 my $cmd = [ $fetch, '-q' ];
1282              
1283             ### if a timeout is set, add it ###
1284 0 0       0 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1285              
1286             ### run passive if specified ###
1287             #push @$cmd, '-p' if $FTP_PASSIVE;
1288 0 0       0 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1289              
1290             ### set the output document, add the uri ###
1291 0         0 push @$cmd, '-o', $to, $self->uri;
1292              
1293             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1294             ### and there's no need for special casing any more.
1295             ### DO NOT quote things for IPC::Run, it breaks stuff.
1296             # $IPC::Cmd::USE_IPC_RUN
1297             # ? ($to, $self->uri)
1298             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1299              
1300             ### shell out ###
1301 0         0 my $captured;
1302 0 0       0 unless(run( command => $cmd,
1303             buffer => \$captured,
1304             verbose => $DEBUG
1305             )) {
1306             ### wget creates the output document always, even if the fetch
1307             ### fails.. so unlink it in that case
1308 0         0 1 while unlink $to;
1309              
1310 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1311             }
1312              
1313 0         0 return $to;
1314             }
1315              
1316             ### use File::Copy for fetching file:// urls ###
1317             ###
1318             ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1319             ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1320             ###
1321              
1322             sub _file_fetch {
1323 2     2   18 my $self = shift;
1324 2         25 my %hash = @_;
1325              
1326 2         10 my ($to);
1327 2         24 my $tmpl = {
1328             to => { required => 1, store => \$to }
1329             };
1330 2 50       29 check( $tmpl, \%hash ) or return;
1331              
1332              
1333              
1334             ### prefix a / on unix systems with a file uri, since it would
1335             ### look somewhat like this:
1336             ### file:///home/kane/file
1337             ### whereas windows file uris for 'c:\some\dir\file' might look like:
1338             ### file:///C:/some/dir/file
1339             ### file:///C|/some/dir/file
1340             ### or for a network share '\\host\share\some\dir\file':
1341             ### file:////host/share/some/dir/file
1342             ###
1343             ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1344             ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1345             ###
1346              
1347 2         186 my $path = $self->path;
1348 2         19 my $vol = $self->vol;
1349 2         26 my $share = $self->share;
1350              
1351 2         8 my $remote;
1352 2 50 33     37 if (!$share and $self->host) {
1353 0         0 return $self->_error(loc(
1354             "Currently %1 cannot handle hosts in %2 urls",
1355             'File::Fetch', 'file://'
1356             ));
1357             }
1358              
1359 2 50       51 if( $vol ) {
    50          
1360 0         0 $path = File::Spec->catdir( split /\//, $path );
1361 0         0 $remote = File::Spec->catpath( $vol, $path, $self->file);
1362              
1363             } elsif( $share ) {
1364             ### win32 specific, and a share name, so we wont bother with File::Spec
1365 0         0 $path =~ s|/+|\\|g;
1366 0         0 $remote = "\\\\".$self->host."\\$share\\$path";
1367              
1368             } else {
1369             ### File::Spec on VMS can not currently handle UNIX syntax.
1370 2         15 my $file_class = ON_VMS
1371             ? 'File::Spec::Unix'
1372             : 'File::Spec';
1373              
1374 2         21 $remote = $file_class->catfile( $path, $self->file );
1375             }
1376              
1377             ### File::Copy is littered with 'die' statements :( ###
1378 2         21 my $rv = eval { File::Copy::copy( $remote, $to ) };
  2         53  
1379              
1380             ### something went wrong ###
1381 2 50 33     1203 if( !$rv or $@ ) {
1382 0         0 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1383             $remote, $to, $!, $@));
1384             }
1385              
1386 2         16 return $to;
1387             }
1388              
1389             ### use /usr/bin/rsync to fetch files
1390             sub _rsync_fetch {
1391 0     0   0 my $self = shift;
1392 0         0 my %hash = @_;
1393              
1394 0         0 my ($to);
1395 0         0 my $tmpl = {
1396             to => { required => 1, store => \$to }
1397             };
1398 0 0       0 check( $tmpl, \%hash ) or return;
1399 0         0 my $rsync;
1400 0 0       0 unless ( $rsync = can_run('rsync') ) {
1401 0         0 $METHOD_FAIL->{'rsync'} = 1;
1402 0         0 return;
1403             }
1404              
1405 0         0 my $cmd = [ $rsync ];
1406              
1407             ### XXX: rsync has no I/O timeouts at all, by default
1408 0 0       0 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1409              
1410 0 0       0 push(@$cmd, '--quiet') unless $DEBUG;
1411              
1412             ### DO NOT quote things for IPC::Run, it breaks stuff.
1413 0         0 push @$cmd, $self->uri, $to;
1414              
1415             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1416             ### and there's no need for special casing any more.
1417             ### DO NOT quote things for IPC::Run, it breaks stuff.
1418             # $IPC::Cmd::USE_IPC_RUN
1419             # ? ($to, $self->uri)
1420             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1421              
1422 0         0 my $captured;
1423 0 0       0 unless(run( command => $cmd,
1424             buffer => \$captured,
1425             verbose => $DEBUG )
1426             ) {
1427              
1428 0   0     0 return $self->_error(loc("Command %1 failed: %2",
      0        
1429             "@$cmd" || '', $captured || ''));
1430             }
1431              
1432 0         0 return $to;
1433              
1434             }
1435              
1436             ### use git to fetch files
1437             sub _git_fetch {
1438 2     2   19 my $self = shift;
1439 2         17 my %hash = @_;
1440              
1441 2         9 my ($to);
1442 2         25 my $tmpl = {
1443             to => { required => 1, store => \$to }
1444             };
1445 2 50       22 check( $tmpl, \%hash ) or return;
1446 2         177 my $git;
1447 2 50       41 unless ( $git = can_run('git') ) {
1448 0         0 $METHOD_FAIL->{'git'} = 1;
1449 0         0 return;
1450             }
1451              
1452 2         1044 my $cmd = [ $git, 'clone' ];
1453              
1454             #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1455              
1456 2 50       19 push(@$cmd, '--quiet') unless $DEBUG;
1457              
1458             ### DO NOT quote things for IPC::Run, it breaks stuff.
1459 2         20 push @$cmd, $self->uri, $to;
1460              
1461             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1462             ### and there's no need for special casing any more.
1463             ### DO NOT quote things for IPC::Run, it breaks stuff.
1464             # $IPC::Cmd::USE_IPC_RUN
1465             # ? ($to, $self->uri)
1466             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1467              
1468 2         10 my $captured;
1469 2 50       169 unless(run( command => $cmd,
1470             buffer => \$captured,
1471             verbose => $DEBUG )
1472             ) {
1473              
1474 0   0     0 return $self->_error(loc("Command %1 failed: %2",
      0        
1475             "@$cmd" || '', $captured || ''));
1476             }
1477              
1478 2         1465375 return $to;
1479              
1480             }
1481              
1482             #################################
1483             #
1484             # Error code
1485             #
1486             #################################
1487              
1488             =pod
1489              
1490             =head2 $ff->error([BOOL])
1491              
1492             Returns the last encountered error as string.
1493             Pass it a true value to get the C output instead.
1494              
1495             =cut
1496              
1497             ### error handling the way Archive::Extract does it
1498             sub _error {
1499 0     0     my $self = shift;
1500 0           my $error = shift;
1501              
1502 0           $self->_error_msg( $error );
1503 0           $self->_error_msg_long( Carp::longmess($error) );
1504              
1505 0 0         if( $WARN ) {
1506 0 0         carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1507             }
1508              
1509 0           return;
1510             }
1511              
1512             sub error {
1513 0     0 1   my $self = shift;
1514 0 0         return shift() ? $self->_error_msg_long : $self->_error_msg;
1515             }
1516              
1517              
1518             1;
1519              
1520             =pod
1521              
1522             =head1 HOW IT WORKS
1523              
1524             File::Fetch is able to fetch a variety of uris, by using several
1525             external programs and modules.
1526              
1527             Below is a mapping of what utilities will be used in what order
1528             for what schemes, if available:
1529              
1530             file => LWP, lftp, file
1531             http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1532             ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1533             rsync => rsync
1534             git => git
1535              
1536             If you'd like to disable the use of one or more of these utilities
1537             and/or modules, see the C<$BLACKLIST> variable further down.
1538              
1539             If a utility or module isn't available, it will be marked in a cache
1540             (see the C<$METHOD_FAIL> variable further down), so it will not be
1541             tried again. The C method will only fail when all options are
1542             exhausted, and it was not able to retrieve the file.
1543              
1544             The C utility is available on FreeBSD. NetBSD and Dragonfly BSD
1545             may also have it from C. We only check for C on those
1546             three platforms.
1547              
1548             C is a very limited L based mechanism for
1549             retrieving C schemed urls. It doesn't follow redirects for instance.
1550              
1551             C only supports C style urls.
1552              
1553             A special note about fetching files from an ftp uri:
1554              
1555             By default, all ftp connections are done in passive mode. To change
1556             that, see the C<$FTP_PASSIVE> variable further down.
1557              
1558             Furthermore, ftp uris only support anonymous connections, so no
1559             named user/password pair can be passed along.
1560              
1561             C is blacklisted by default; see the C<$BLACKLIST> variable
1562             further down.
1563              
1564             =head1 GLOBAL VARIABLES
1565              
1566             The behaviour of File::Fetch can be altered by changing the following
1567             global variables:
1568              
1569             =head2 $File::Fetch::FROM_EMAIL
1570              
1571             This is the email address that will be sent as your anonymous ftp
1572             password.
1573              
1574             Default is C.
1575              
1576             =head2 $File::Fetch::USER_AGENT
1577              
1578             This is the useragent as C will report it.
1579              
1580             Default is C.
1581              
1582             =head2 $File::Fetch::FTP_PASSIVE
1583              
1584             This variable controls whether the environment variable C
1585             and any passive switches to commandline tools will be set to true.
1586              
1587             Default value is 1.
1588              
1589             Note: When $FTP_PASSIVE is true, C will not be used to fetch
1590             files, since passive mode can only be set interactively for this binary
1591              
1592             =head2 $File::Fetch::TIMEOUT
1593              
1594             When set, controls the network timeout (counted in seconds).
1595              
1596             Default value is 0.
1597              
1598             =head2 $File::Fetch::WARN
1599              
1600             This variable controls whether errors encountered internally by
1601             C should be C'd or not.
1602              
1603             Set to false to silence warnings. Inspect the output of the C
1604             method manually to see what went wrong.
1605              
1606             Defaults to C.
1607              
1608             =head2 $File::Fetch::DEBUG
1609              
1610             This enables debugging output when calling commandline utilities to
1611             fetch files.
1612             This also enables C errors, instead of the regular
1613             C errors.
1614              
1615             Good for tracking down why things don't work with your particular
1616             setup.
1617              
1618             Default is 0.
1619              
1620             =head2 $File::Fetch::BLACKLIST
1621              
1622             This is an array ref holding blacklisted modules/utilities for fetching
1623             files with.
1624              
1625             To disallow the use of, for example, C and C, you could
1626             set $File::Fetch::BLACKLIST to:
1627              
1628             $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1629              
1630             The default blacklist is [qw|ftp|], as C is rather unreliable.
1631              
1632             See the note on C below.
1633              
1634             =head2 $File::Fetch::METHOD_FAIL
1635              
1636             This is a hashref registering what modules/utilities were known to fail
1637             for fetching files (mostly because they weren't installed).
1638              
1639             You can reset this cache by assigning an empty hashref to it, or
1640             individually remove keys.
1641              
1642             See the note on C below.
1643              
1644             =head1 MAPPING
1645              
1646              
1647             Here's a quick mapping for the utilities/modules, and their names for
1648             the $BLACKLIST, $METHOD_FAIL and other internal functions.
1649              
1650             LWP => lwp
1651             HTTP::Lite => httplite
1652             HTTP::Tiny => httptiny
1653             Net::FTP => netftp
1654             wget => wget
1655             lynx => lynx
1656             ncftp => ncftp
1657             ftp => ftp
1658             curl => curl
1659             rsync => rsync
1660             lftp => lftp
1661             fetch => fetch
1662             IO::Socket => iosock
1663              
1664             =head1 FREQUENTLY ASKED QUESTIONS
1665              
1666             =head2 So how do I use a proxy with File::Fetch?
1667              
1668             C currently only supports proxies with LWP::UserAgent.
1669             You will need to set your environment variables accordingly. For
1670             example, to use an ftp proxy:
1671              
1672             $ENV{ftp_proxy} = 'foo.com';
1673              
1674             Refer to the LWP::UserAgent manpage for more details.
1675              
1676             =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1677              
1678             C can only fetch remote files by dumping its contents to C,
1679             which we in turn capture. If that content is a 'custom' error file
1680             (like, say, a C<404 handler>), you will get that contents instead.
1681              
1682             Sadly, C doesn't support any options to return a different exit
1683             code on non-C<200 OK> status, giving us no way to tell the difference
1684             between a 'successful' fetch and a custom error page.
1685              
1686             Therefor, we recommend to only use C as a last resort. This is
1687             why it is at the back of our list of methods to try as well.
1688              
1689             =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1690              
1691             C is relatively smart about things. When trying to write
1692             a file to disk, it removes the C (see the
1693             C method for details) from the file name before creating
1694             it. In most cases this suffices.
1695              
1696             If you have any other characters you need to escape, please install
1697             the C module from CPAN, and pre-encode your URI before
1698             passing it to C. You can read about the details of URIs
1699             and URI encoding here:
1700              
1701             http://www.faqs.org/rfcs/rfc2396.html
1702              
1703             =head1 TODO
1704              
1705             =over 4
1706              
1707             =item Implement $PREFER_BIN
1708              
1709             To indicate to rather use commandline tools than modules
1710              
1711             =back
1712              
1713             =head1 BUG REPORTS
1714              
1715             Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org.
1716              
1717             =head1 AUTHOR
1718              
1719             This module by Jos Boumans Ekane@cpan.orgE.
1720              
1721             =head1 COPYRIGHT
1722              
1723             This library is free software; you may redistribute and/or modify it
1724             under the same terms as Perl itself.
1725              
1726              
1727             =cut
1728              
1729             # Local variables:
1730             # c-indentation-style: bsd
1731             # c-basic-offset: 4
1732             # indent-tabs-mode: nil
1733             # End:
1734             # vim: expandtab shiftwidth=4:
1735              
1736              
1737              
1738