File Coverage

blib/lib/LWP/Protocol/rsync.pm
Criterion Covered Total %
statement 99 182 54.4
branch 28 102 27.4
condition 6 33 18.1
subroutine 16 20 80.0
pod 1 1 100.0
total 150 338 44.3


line stmt bran cond sub pod time code
1             # Copyright 2012, 2013, 2014, 2018, 2019 Kevin Ryde
2              
3             # This file is part of LWP-Protocol-rsync.
4             #
5             # LWP-Protocol-rsync is free software; you can redistribute it and/or modify it
6             # under the terms of the GNU General Public License as published by the Free
7             # Software Foundation; either version 3, or (at your option) any later
8             # version.
9             #
10             # LWP-Protocol-rsync is distributed in the hope that it will be useful, but
11             # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12             # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13             # for more details.
14             #
15             # You should have received a copy of the GNU General Public License along
16             # with LWP-Protocol-rsync. If not, see .
17              
18              
19             # RFC 5781 rsync schema
20             # RFC 3986 URI general
21             # RFC 2518 WebDAV
22             # RFC 2068 HTTP 1.1
23             # - reason phrase in status line can be changed without affecting protocol
24             # RFC 2616 HTTP 1.1
25             #
26             # cf LWP::Protocol::ftp
27             # LWP::Protocol::http
28             # LWP::Protocol::file
29              
30              
31             package LWP::Protocol::rsync;
32 3     3   138003 use strict;
  3         14  
  3         84  
33 3     3   57 use 5.005; # for \z
  3         12  
34 3     3   15 use File::Spec;
  3         5  
  3         86  
35 3     3   531 use HTTP::Date ();
  3         4535  
  3         88  
36 3     3   455 use HTTP::Response;
  3         31497  
  3         93  
37 3     3   25 use HTTP::Status ();
  3         14  
  3         108  
38 3     3   2814 use IPC::Run;
  3         124121  
  3         138  
39 3     3   1461 use LWP::MediaTypes ();
  3         49916  
  3         105  
40 3     3   28 use URI::Escape ();
  3         7  
  3         75  
41              
42 3     3   16 use vars '$VERSION','@ISA';
  3         8  
  3         177  
43             $VERSION = 2;
44              
45 3     3   482 use LWP::Protocol;
  3         4825  
  3         6423  
46             @ISA = ('LWP::Protocol');
47              
48             # uncomment this to run the ### lines
49             # use Smart::Comments;
50              
51              
52             # $arg is
53             # undef -- use HTTP::Request / HTTP::Response ->content()
54             # scalar -- filename
55             # coderef -- func to call with data blocks of $size bytes
56             #
57             sub request {
58 2     2 1 348 my($self, $request, $proxy, $arg, $size) = @_;
59 2 50 33     21 $size = 4096 unless defined $size and $size > 0;
60              
61 2 50       7 if (defined $proxy) {
62 0         0 return HTTP::Response->new(HTTP::Status::RC_BAD_REQUEST(),
63             'No proxy support for rsync');
64             }
65              
66             # URI::rsync documented in URI.pm
67 2         20 my $uri = $request->uri;
68             ### $uri
69              
70 2         36 my $scheme = $uri->scheme;
71 2 50       57 if ($scheme ne 'rsync') {
72 0         0 return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR(),
73             "URI scheme not rsync");
74             }
75              
76 2         29 my $hostport = $uri->host_port; # eg "localhost:9999"
77 2         207 my $user = $uri->user; # or undef if no "fred@" part present
78 2         104 my $password = $uri->password;
79 2         67 my $path = $uri->path;
80             ### $path
81              
82 2         37 foreach ($hostport, $user, $password, $path) {
83 8 100       99 if (defined $_) {
84 4         19 $_ = URI::Escape::uri_unescape($_); # mutate to undo %20 etc
85             }
86             }
87             ### path unescaped: $path
88              
89             # wildcards bad in $path (but ok in $hostport as "[::1]" etc IPv6)
90 2 50       21 if ($path =~ /[*?[]/) {
91 0         0 return HTTP::Response->new(HTTP::Status::HTTP_NOT_IMPLEMENTED(),
92             "Characters * ? [ not allowed in path");
93             }
94              
95 2 50       11 my $uri_str = "rsync://".(defined $user ? "$user@" : "").$hostport.$path;
96             ### $uri_str
97              
98 2         20 my $method = $request->method;
99             ### $method
100              
101 2   33     49 my $dir_listing = ($uri_str =~ m{/\z}
102             || _path_is_modules_or_root($path));
103 2         4 my $content_type;
104 2 50       7 if ($dir_listing) {
105 0         0 $content_type = 'text/plain';
106             }
107             ### $dir_listing
108             ### $content_type
109              
110 2 100       7 if ($method eq 'HEAD') {
111             # With --no-dirs a directory sets $listing to either
112             # skipping directory foo # no trailing /
113             # skipping directory . # if trailing /
114             # Without --no-dirs and without trailing slash is
115             # drwxrwxrwt 69,632 2014/03/26 19:18:01 tmp
116             #
117             # For the root directory "rsync://hostname/module/" or
118             # "rsync://hostname/module" both give the full listing of the root
119             # directory, not just the root "/" itself. Use --quiet to suppress that
120             # so as to check just the existence. Likewise the module listing is
121             # suppressed with --quiet.
122              
123 1         5 $uri_str =~ s{/+\z}{}; # strip trailing slashes
124             ### uri strip trailing slashes: $uri_str
125              
126 1         1 my $listing;
127 1 50       3 if (my $resp = _run_rsync($password,
    50          
128             [ 'rsync',
129             '--no-dirs',
130             (_path_is_modules_or_root($path)
131             ? '--quiet'
132             : ()),
133             $uri_str, # single arg means --list-only
134             ],
135             \$listing)) {
136 1         386 return $resp;
137             }
138              
139 0         0 my ($perms, $length, $mtime) = _parse_listing($listing);
140             ### $perms
141             ### $length
142             ### $mtime
143              
144             # No Content-Length for directory. The size in the listing is the
145             # directory size on disk and is not the length the GET listing will
146             # give. Could fetch the whole listing like GET to find the size, but
147             # the reason for HEAD is not to do a full fetch like that.
148 0   0     0 my $dir_listing ||= ($listing =~ /^(d|skipping directory)/);
149 0 0       0 if ($dir_listing) {
150 0         0 undef $length;
151 0         0 $content_type = 'text/plain';
152             }
153 0         0 my $resp = HTTP::Response->new(HTTP::Status::RC_OK(),
154             undef,
155             [ _content_headers($length, $mtime, $content_type) ]);
156 0 0       0 unless ($content_type) {
157 0         0 LWP::MediaTypes::guess_media_type($uri, $resp->headers);
158             }
159 0         0 return $resp;
160             }
161              
162             # If :content_file then rsync directly to or from it, otherwise a temp
163             # file.
164 1         3 my $temp_fh; # if a temp file is used
165             my $filename; # either $arg or $temp_fh->filename()
166 1 50 33     4 if ($arg && ! ref $arg) {
167             ### arg is content_file ...
168 0         0 $filename = $arg;
169             # $arg = \&_content_cb_noop;
170             } else {
171             ### arg not a file, make a temp file ...
172 1         751 require File::Temp;
173 1         10457 $temp_fh = File::Temp->new;
174 1         740 $filename = $temp_fh->filename;
175 1 50       12 binmode($temp_fh)
176             or return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR(),
177             "Cannot binmode on $filename: $!");
178             }
179             ### $filename
180              
181 1 50       5 if ($method eq 'GET') {
182             ### _path_is_modules_or_root: _path_is_modules_or_root($path)
183              
184             # No If-Modified-Since check on a directory or on the modules listing
185             # "rsync://hostname/module".
186             #
187 1 50 33     22 if (! $dir_listing
188             && defined (my $ims_str = $request->header('If-Modified-Since'))) {
189 0 0       0 if (defined (my $ims_time = HTTP::Date::str2time($ims_str))) {
190 0         0 my $listing;
191 0         0 _run_rsync($password,
192             [ 'rsync', $uri_str ], # same as HEAD above
193             \$listing);
194 0 0       0 if (defined $listing) { # if rsync ran successfully
195 0         0 my ($perms, $length, $mtime) = _parse_listing($listing);
196             ### mtime : $mtime
197             ### ims_time: $ims_time
198 0 0 0     0 if (! (defined $perms && $perms =~ /^d/) # no check of directory listing
      0        
      0        
199             && defined $mtime && $mtime <= $ims_time) {
200 0         0 my $resp = HTTP::Response->new (HTTP::Status::RC_NOT_MODIFIED(),
201             undef,
202             [ _content_headers($length, $mtime) ]);
203 0         0 LWP::MediaTypes::guess_media_type($uri, $resp->headers);
204 0         0 return $resp;
205             }
206             }
207             }
208             }
209              
210 1         111 my $mtime;
211              
212 1 50       3 if (! $dir_listing) {
213 1         1 my $stdout;
214 1 50       7 if (my $resp = _run_rsync($password,
215             [ 'rsync',
216             '--checksum', # no date/size quick check
217             '-t', # -t set destination $filename modtime
218             '--inplace', # write into $filename rather than renaming
219             $uri_str,
220             $filename ],
221             \$stdout)) {
222 1         153 return $resp;
223             }
224             # if the path was in fact a directory then re-run to get its listing
225 0         0 $dir_listing = ($stdout =~ /^skipping directory/mi);
226             }
227              
228             # For non-root directory listing must have trailing slash like
229             # "rsync://hostname/module/dirname/" otherwise the listing is just the
230             # directory itself like
231             # "drwxrwxrwt 69,632 2014/03/26 19:38:01 tmp"
232             #
233 0 0       0 if ($dir_listing) {
234 0 0       0 unless ($uri_str =~ m{/\z}) { $uri_str .= '/'; }
  0         0  
235 0 0       0 if (my $resp = _run_rsync($password,
236             [ 'rsync', $uri_str ],
237             $filename)) {
238 0         0 return $resp;
239             }
240 0         0 $content_type = 'text/plain';
241             } else {
242 0         0 $mtime = _stat_mtime($filename)
243             }
244              
245 0         0 my $resp = HTTP::Response->new(HTTP::Status::RC_OK(),
246             undef,
247             [ _content_headers(-s $filename, $mtime, $content_type) ]);
248 0 0       0 unless ($content_type) {
249 0         0 LWP::MediaTypes::guess_media_type($uri, $resp->headers);
250             }
251              
252             # If not read directly into :content_file $arg filename then collect
253             # from $temp_fh into $resp. collect() enforces max_size and has
254             # some callbacks.
255             #
256             # FIXME: Should we worry about those for the :content_file case? For
257             # the max_size we already have the full content, would there be any
258             # merit in truncating it?
259             #
260 0 0       0 if ($temp_fh) {
261 0         0 my $readerr;
262             $self->collect($arg, $resp, sub {
263 0     0   0 my $content = "";
264 0         0 my $bytes = sysread($temp_fh, $content, $size);
265             ### $bytes
266 0 0       0 if (! defined $bytes) {
267 0         0 $readerr = "$!";
268 0         0 return '';
269             }
270 0         0 return \$content;
271 0         0 });
272 0 0       0 if (defined $readerr) {
273 0         0 return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR(),
274             "Error reading $filename: $readerr");
275             }
276             }
277              
278 0         0 return $resp;
279             }
280              
281 0 0       0 if ($method eq 'PUT') {
282             ### PUT ...
283              
284             # ENHANCE-ME: Does "Content-Range" mean ->content() is only that part of
285             # the data?
286              
287 0 0       0 if ($temp_fh) {
288 0 0       0 if (defined (my $err = _http_message_content_to_fh($request, $temp_fh,
289             $filename))) {
290 0         0 return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR(),
291             "Error writing $filename: $err");
292             }
293             }
294             ### ls: system("ls -d $filename")
295              
296 0 0       0 if (my $resp = _run_rsync($password,
297             [ 'rsync',
298             '--checksum', # full check, not date/size quick
299             '--inplace', # write into file, not rename
300             $filename,
301             $uri_str ])) {
302 0         0 return $resp;
303             }
304              
305             # Per RFC 2616 and webdav RFC 2518
306             # "201 Created" is for newly created destination resource.
307             # ENHANCE-ME: Supposed to be "200 OK" for existing destination modified.
308             #
309 0         0 return HTTP::Response->new(HTTP::Status::RC_CREATED());
310             }
311              
312 0         0 return HTTP::Response->new(HTTP::Status::RC_NOT_IMPLEMENTED(),
313             "Unrecognised rsync method: $method");
314             }
315              
316             sub _run_rsync {
317 2     2   5 my ($password, $command_line, $stdout_ref) = @_;
318             ### _run_rsync() ...
319             # ### diagnostic -ivvvv: splice @$command_line, 1,0, '-ivv'
320              
321 2 50       6 if (! $stdout_ref) {
322 0         0 my $stdout;
323 0         0 $stdout_ref = \$stdout;
324             }
325 2         2 my $stderr;
326             my $eval;
327             {
328             # Always set $ENV{RSYNC_PASSWORD}, to an empty string if nothing else.
329             # Otherwise rsync will prompt for a password with getpass() or similar
330             # (which opens and read /dev/tty).
331             #
332             # Does --protect-args do anything when talking to the daemon? Turn it
333             # on since certainly don't want space splitting etc. Do this by
334             # $ENV{'RSYNC_PROTECT_ARGS'} since the option is only in rsync 3.1 up.
335             #
336             # $ENV{'TZ'} set to GMT so that the date/time in the listing output will
337             # be in GMT. If the local timezone has any daylight savings then when
338             # the clocks go back times are duplicated and so are ambiguous.
339             #
340 2 50       4 if (! defined $password) { $password = ''; }
  2         7  
  2         6  
341 2         279 local %ENV = (%ENV,
342             RSYNC_PROTECT_ARGS => 1,
343             RSYNC_PASSWORD => $password,
344             TZ => 'GMT+0');
345              
346             ### $command_line
347             ### RSYNC_PASSWORD: $ENV{'RSYNC_PASSWORD'}
348 2         9 $eval = eval {
349 2 50       39 IPC::Run::run ($command_line,
350             '<', File::Spec->devnull,
351             (defined $stdout_ref
352             ? ('>', $stdout_ref, '2>', \$stderr)
353             : ('>', \$stderr, '2>&1')));
354 1         10387 1 };
355             }
356 2 100       1831 if (! $eval) {
357 1         4 my $err = $@;
358 1         10 return HTTP::Response->new(HTTP::Status::RC_INTERNAL_SERVER_ERROR(),
359             "Cannot run rsync program",
360             [ 'Content-Type' => 'text/plain' ],
361             $err);
362             }
363              
364             # "401 Unauthorized" here applies to all rsync runs, GET, HEAD and PUT.
365 1         8 my $wstat = $?;
366             ### wstat: sprintf '0x%X', $wstat
367             ### $stdout_ref
368             ### $stderr
369 1 50       5 if ($wstat != 0) {
370 1 50 33     34 return HTTP::Response->new(($stderr =~ /\@ERROR: auth failed/
    50          
371             ? HTTP::Status::RC_UNAUTHORIZED()
372             : HTTP::Status::RC_NOT_FOUND()),
373             undef,
374             [ 'Content-Type' => 'text/plain' ],
375             join ('',
376             "rsync program ", _wstat_str($wstat), "\n",
377             (ref $stdout_ref && defined $$stdout_ref ? ($$stdout_ref, "\n") : ()),
378             $stderr, "\n"));
379             }
380              
381 0         0 return;
382             }
383              
384             # POSIX::WIFEXITED() and friends either croak (Perl 5.8 up) or don't exist
385             # at all (5.6.x and earlier) if not available from the system.
386             #
387             sub _wstat_str {
388 1     1   6 my ($wstat) = @_;
389 1         13 require POSIX;
390 1 50 33     6 if (eval { POSIX::WIFEXITED($wstat) }
  1         16  
391 1         11 && defined (my $exit_code = eval { POSIX::WEXITSTATUS($wstat) })) {
392 1         59 return "exit code $exit_code";
393             }
394 0 0 0     0 if (eval { POSIX::WIFSIGNALED($wstat) }
  0         0  
395 0         0 && defined (my $signal_number = eval { POSIX::WTERMSIG($wstat) })) {
396 0         0 return "signal $signal_number";
397             }
398 0         0 return sprintf "exit status 0x%X", $wstat;
399             }
400              
401             # $request is a HTTP::Message.
402             # Write its $request->content() bytes to file handle $fh.
403             # If successful return undef.
404             # If error then return a string describing the problem.
405             # $filename is used in the error message.
406             #
407             sub _http_message_content_to_fh {
408 0     0   0 my ($request, $fh, $filename) = @_;
409             ### _http_message_content_to_fh() ...
410              
411 0         0 my $content = $request->content;
412 0 0       0 if (! defined $content) {
413 0         0 return "no content in request";
414             }
415              
416 0 0       0 if (ref($content) eq 'SCALAR') {
    0          
    0          
417             ### scalar ref ...
418 0 0       0 if (print $fh $$content) {
419 0         0 return; # good
420             }
421             # write error
422              
423             } elsif (ref($content) eq 'CODE') {
424             ### coderef ...
425 0         0 for (;;) {
426 0         0 my $buf = &$content();
427 0 0       0 if (length($buf) == 0) {
428 0         0 return; # good
429             }
430 0 0       0 print $fh $buf
431             or last; # write error
432             }
433              
434             } elsif (! ref $content) {
435             ### plain scalar ...
436 0 0       0 if (print $fh $content) {
437 0         0 return; # good
438             }
439             # write error
440              
441             } else {
442 0         0 return "unrecognised request content()";
443             }
444              
445 0         0 return "Cannot write $filename: $!";
446             }
447              
448             # sub _content_cb_noop {
449             # }
450              
451             # Return the mtime modification time of a filename or file handle.
452             sub _stat_mtime {
453 0     0   0 my ($fh_or_filename) = @_;
454 0         0 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize,
455             $atime,$mtime,$ctime,$blksize,$blocks) = stat $fh_or_filename;
456 0         0 return $mtime;
457             }
458              
459             # $listing is a file info string from rsync.
460             # Return ($perms, $length, $mtime), possibly with undefs if parts not
461             # recognised.
462             # $perms is a string
463             # $length is an integer number
464             # $mtime is a time_t number
465             #
466             # $listing is like
467             # -rw-r--r-- 1,260 2004/10/29 04:50:12 foo.txt
468             # or for a symlink
469             # lrwxrwxrwx 3 2014/03/20 17:21:21 bar -> foo
470             #
471             # rsync 3.1 introduces commas as digit grouping for the size. Or dots if
472             # the decimal point is not a dot in the locale. The "k" etc abbreviations
473             # are confined to --human-readable so don't occur. Code in rsync
474             # lib/compat.c. Any dots or commas are removed for the returned $length.
475             #
476             # Date/time in the listing is in the client-side system timezone.
477             # _run_rsync() above forces it to GMT and that's how it's treated here when
478             # converting to time_t $mtime.
479             #
480             # rsync code in generator.c list_file_entry().
481             #
482             sub _parse_listing {
483 3     3   1111 my ($listing) = @_;
484             ### $listing
485              
486 3         10 my ($perms, $length, $mtime);
487 3 50       29 if (($perms, $length, my $mtime_str)
488             = ($listing =~ m{\s*(\S+)\s+([0-9,.]+)\s+([0-9/]+ [0-9:]+)})) {
489 3         7 $length =~ tr/.,//d; # delete commas and dots
490 3         12 $mtime = HTTP::Date::str2time($mtime_str, 'GMT');
491             }
492 3         349 return ($perms, $length, $mtime);
493             }
494              
495             # return a list
496             sub _content_headers {
497 0     0   0 my ($length, $mtime, $content_type) = @_;
498 0 0       0 return ('Content-Length' => $length,
499             (defined $mtime ? ('Last-Modified' => HTTP::Date::time2str($mtime)) : ()),
500             'Content-Type' => $content_type,
501             # 'X-Rsync-Perms' => $perms, # any good?
502             );
503             }
504              
505             # Return true if $path has no module part, hence giving a modules listing,
506             # or has a module part but only the root directory.
507             sub _path_is_modules_or_root {
508 3     3   6 my ($path) = @_;
509 3         23 return scalar($path !~ m{^/[^/]+ # module part
510             /.*
511             [^./]* # some non-. or / means not root dir
512             }x);
513             }
514              
515             1;
516             __END__