File Coverage

blib/lib/Net/ChooseFName.pm
Criterion Covered Total %
statement 182 209 87.0
branch 103 148 69.5
condition 35 59 59.3
subroutine 25 30 83.3
pod 19 20 95.0
total 364 466 78.1


line stmt bran cond sub pod time code
1             package Net::ChooseFName;
2              
3 1     1   58285 use 5.005;
  1         4  
  1         45  
4 1     1   6 use strict;
  1         2  
  1         60  
5              
6 1     1   984 use URI::URL 'url';
  1         2002999  
  1         73  
7 1     1   11 use File::Path 'mkpath';
  1         2  
  1         54  
8 1     1   1393 use LWP::MediaTypes qw(guess_media_type media_suffix add_type);
  1         21603  
  1         196  
9 1     1   13 use vars qw($VERSION);
  1         1  
  1         5379  
10              
11             $VERSION = '0.01';
12              
13             =head1 NAME
14              
15             Net::ChooseFName - Perl extension for choosing a name of a local mirror
16             of a net (e.g., FTP or HTTP) resource.
17              
18             =head1 SYNOPSIS
19              
20             use Net::ChooseFName;
21             $namer = Net::ChooseFName->new(max_length => 64); # Copies to CD ok
22              
23             $name = $namer->find_name_by_response($LWP_response);
24             $name = $namer->find_name_by_response($LWP_response, $as_if_content_type);
25              
26             $name = $namer->find_name_by_url($url, $suggested_name,
27             $content_type, $content_encoding);
28             $name = $namer->find_name_by_url($url, $suggested_name, $content_type);
29             $name = $namer->find_name_by_url($url, $suggested_name);
30             $name = $namer->find_name_by_url($url);
31              
32             $namer_returns_undef = Net::ChooseFName->failer(); # Funny constructor
33              
34              
35             =head1 DESCRIPTION
36              
37             This module helps to pick up a local file name for a remote resource
38             (e.g., one downloaded from Internet). It turns out that this is a
39             tricky business; keep in mind that most servers are misconfigured,
40             most URLs are malformed, and most filesystems are limited
41             w.r.t. possible filenames. As a result most downloaders fail to work
42             in some situations since they choose names which are not supported on
43             particular filesystems, or not useful for C-related work.
44              
45             Because of the many possible twists and ramifications, the design of
46             this module is to be as much configurable as possible. One of ways of
47             configurations is a rich system of options which influence
48             different steps of the process. To cover cases when options are not
49             flexible enough, the process is broken into many steps; each step is
50             easily overridable by subclassing C.
51              
52             The defaults are chosen to be as safe as possible while not getting
53             very much into the ways. For example, since C<%> is a special
54             character on DOSish shells, to simplify working from command line on
55             such systems, we avoid this letter in generated file names.
56             Similarly, since MacOS has problems with filenames with 8-bit
57             characters, we avoid them too; since may Unix programs have problem
58             with spaces in file names, we massage them into underscores; the
59             length of the longest file path component is restricted to 255 chars.
60              
61             Note that in many situations it is advisable to make these
62             restrictions yet stronger. For example, for copying to CD one should
63             restrict names yet more (C 64>); for copying to MSDOS
64             file systems enable option C<'8+3' =E 1>.
65              
66             [In the description of methods the $self argument is omitted.]
67              
68             =head2 Principal methods
69              
70             =over
71              
72             =item new(OPT1 => $val1, ...)
73              
74             Constructor method. Creates an object with given options. Default
75             values for the unspecified options are (comments list in which methods
76             this option is used):
77              
78             protect => # protect_characters()
79             # $1 should contain the match
80             qr/([?*|\"<>\\:?#\x00-\x1F\x7F-\xFF\[\])/,
81             protect_pref => '@', # protect_characters(), protect_directory()
82             root => '.', # find_directory()
83             dir_mode => 0775, # directory_found()
84             mkpath => 1, # directory_found()
85             max_suff_len => 4, # split_suffix() 'jpeg'
86             keepsuff_same_mediatype => 1, # choose_suffix()
87             type_suff => # choose_suffix()
88             {'text/ftp-dir-listing' => '.dirl'}
89             keep_suff => { text/plain => 1,
90             application/octet-stream => 1 },
91             short_suffices => # eight_plus_three()
92             {jpeg => 'jpg', html => 'htm',
93             'tar.bz2' => 'tbz', 'tar.gz' => 'tgz'},
94             suggest_disposition => 1, # find_name_by_response()
95             suggested_only_basename => 1, # find_name_by_response(), raw_name()
96             fix_url_backslashes => 1, # protect_characters()
97             max_length => 255, # fix_dups(), fix_component()
98             cache_name => 1, # name_found()
99             queryless_types => # url_takes_query()
100             { map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01
101             qw(image/djvu image/x-djvu image/dejavu image/x-dejavu
102             image/djvw image/x.djvu image/vnd.djvu ))},
103             queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query()
104              
105             The option C is special so that the user-specified value is
106             I to this hash, and not I it. Similarly, the value
107             of option C is used to populate the value for C
108             of this hash.
109              
110             Other, options have C as the default value. Their effects are
111             documented in the documentation of the methods they affect. With the
112             exception of C, these options are booleans.
113              
114             html_suff # new()
115             known_names # known_names() name_found(); hash ref or undef
116             only_known # known_names()
117             hierarchical # raw_name(), find_directory()
118             use_query # raw_name()
119             8+3 # fix_basename(), fix_component()
120             keep_space # fix_component()
121             keep_dots # fix_component()
122             tolower # fix_component()
123             dir_query # find_directory()
124             site_dir # find_directory()
125             ignore_existing_files # fix_dups
126              
127             keep_nosuff, type_suff_no_enc, type_suff_fallback,
128             type_suff_fallback_no_enc # choose_suffix()
129              
130             Summary of the most useful in applications options (with defaults if
131             applicable):
132              
133             html_suff # Suffix for HTML (dot will be prepended)
134             root => '.', # Where to put files?
135             mkpath => 1, # Create directories with chosen names?
136             max_length => 255, # Maximal length of a path component
137             ignore_existing_files # Should the filename be "new"?
138             cache_name => 1, # Return the same filename on the same URL,
139             # even if file jumped to existence?
140             hierarchical # Only the last component of URL path matters?
141             suggested_only_basename => 1, # Should suggested name be relative the path?
142             use_query # Do not ignore the query part of URL?
143             # Value is used as (literal) prefix of query
144             dir_query # Make the non-query part of URL a directory?
145             site_dir # Put the hostname part of URL into directory?
146             keepsuff_same_mediatype # Preserve the file extensions matching type?
147             8+3 # Is the filesystem DOSish?
148             keep_space # Map spaces in URL to spaces in filenames?
149             tolower # Translate filenames to lowercase?
150              
151             type_suff, type_suff_no_enc, type_suff_fallback, type_suff_fallback_no_enc,
152             keep_suff, keep_nosuff # Hashes indexed by lowercased types;
153             # Allow tuning choosing the suffix
154              
155             =cut
156              
157             my $pr = '([?*|\"<>\\\\:?#\x00-\x1F\x7F-\xFF\\[\\]])';
158             my $defaults = {
159             protect => eval("qr/$pr/") || $pr,
160             protect_pref => '@',
161             root => '.',
162             dir_mode => 0775,
163             mkpath => 1,
164             keep_suff => {map(($_ => 1),
165             qw( text/plain
166             application/octet-stream application/download ))},
167             max_suff_len => 4, # 'jpeg'
168             keepsuff_same_mediatype => 1,
169             short_suffices => {qw( jpeg jpg tar.bz2 tbz html htm
170             tar.gz tgz )}, # used if '8+3' is true
171             suggest_disposition => 1,
172             suggested_only_basename => 1,
173             fix_url_backslashes => 1,
174             max_length => 255,
175             cache_name => 1, # name_found()
176             queryless_types => # url_takes_query()
177             { map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01
178             qw(image/djvu image/x-djvu image/dejavu image/x-dejavu
179             image/vnd.djvw image/djvw image/x.djvu))},
180             queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query()
181             };
182              
183             my %default_suff = (
184             'text/ftp-dir-listing' => '.dirl',
185             );
186              
187             sub new {
188 21     21 1 18904 my $class = shift;
189 21         386 my $self = bless {%$defaults, @_}, $class;
190 21   50     164 $self->{type_suff} ||= {};
191 21 50       164 $self->{type_suff}{'text/html'} = ".$self->{html_suff}"
192             if defined $self->{html_suff};
193 21         50 $self->{type_suff} = {%default_suff, %{$self->{type_suff}}};
  21         82  
194 21         75 $self;
195             }
196              
197             =item find_name_by_url($url, $suggested_name, $type, $enc)
198              
199             This method returns a suitable filename for the resource given its URL.
200             Optional arguments are a suggested name (possibly, it will be modified
201             according to options of the object), the content-type, and the
202             content-encoding of the resource. If multiple content-encodings are
203             required, specify them as an array reference.
204              
205             A chain of helper methods (L<"Transformation chain">) is called to
206             apply certain transformations to the name. C is returned if
207             any of the helper methods (except known_names() and protect_query())
208             return undefined values; the caller is free to interpret this as "load
209             to memory", if appropriate. These helper methods are listed in the
210             following section.
211              
212             =cut
213              
214             sub find_name_by_url {
215 39     39 1 11087 my ($self, $url, $suggested, $type, $enc) = @_;
216              
217 39 50       109 defined($url = $self->url_2resource($url, $type, $enc)) or return;
218              
219 39         116 my $f = $self->known_names($url, $suggested, $type, $enc);
220 39 100       132 return $f if defined $f;
221              
222 31 50       92 ($f, my $q) = $self->raw_name($url, $suggested, $type, $enc)
223             or return;
224              
225 31         1121 $f = $self->protect_characters($f, $q, $url, $suggested, $type, $enc);
226 31 50       88 return unless defined $f;
227              
228 31         82 $q = $self->protect_query($f, $q, $url, $suggested, $type, $enc);
229              
230 31 50       98 (my $dirname, $f, $q)
231             = $self->find_directory($f, $q, $url, $suggested, $type, $enc)
232             or return;
233              
234 31         111 $dirname = $self->protect_directory($dirname, $f, $q, $url, $suggested, $type, $enc);
235 31 50       110 return unless defined $dirname;
236              
237 31         77 $dirname = $self->directory_found($dirname, $f, $q, $url, $suggested, $type, $enc);
238 31 50       81 return unless defined $dirname;
239              
240 31 50       226 ($f, my $suff) =
241             $self->split_suffix($f, $dirname, $q, $url, $suggested, $type, $enc)
242             or return;
243 31 50       115 ($f, $suff) = $self->choose_suffix($f, $suff, $dirname, $q, $url,
244             $suggested, $type, $enc)
245             or return;
246 31 50       107 ($f, $suff) =
247             $self->fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc)
248             or return;
249 31         98 $f = $self->fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc);
250 31 50       74 return unless defined $f;
251 31         105 return $self->name_found($url, $f, $dirname, $suff, $suggested, $type, $enc);
252             }
253              
254             =item find_name_by_response($response [, $content_type])
255              
256             This method returns name given an LWP response object (and,
257             optionally, an overriding C). If option
258             C is TRUE, uses the header C
259             from the response as the suggested name, then passes the fields from
260             the response object to the method find_name_by_url().
261              
262             =cut
263              
264             sub find_name_by_response {
265 0     0 1 0 my ($self, $res, $ct) = (shift, shift, shift);
266 0 0       0 $ct = $res->content_type unless defined $ct;
267             # "Content-Disposition" header is defined by RFC1806; supported by Netscape
268 0   0     0 my $cd = $self->{suggest_disposition} && $res->header("Content-Disposition");
269 0         0 my $suggested;
270 0 0 0     0 if ($cd && $cd =~ /\bfilename\s*=\s*(\S+)/) {
271 0         0 $suggested = $1;
272 0         0 $suggested =~ s/;$//;
273 0         0 $suggested =~ s/^([\"\'])(.*)\1$/$2/;
274 0 0       0 $suggested =~ s,.*[\\/],, if $self->{suggested_only_basename};
275             }
276 0         0 $self->find_name_by_url($res->request->url, $suggested,
277             $ct, $res->content_encoding);
278             }
279              
280             =back
281              
282             =head2 Transformation chain
283              
284             =over
285              
286             =item url_2resource($url [, $type, $encoding])
287              
288             This method returns $url modified by removing the parts related to
289             access to I of the resource. In particular, the I part is
290             removed, as well as the I part if url_is_queryless() returns TRUE.
291              
292             =cut
293              
294             sub url_2resource {
295 39     39 1 64 my ($self, $url, $type, $enc) = @_;
296              
297 39 50       194 $url = url($url) unless ref($url);
298 39         19240 my $cpy;
299 39 100       368 if (defined $url->frag) {
300 10         344 $cpy = $url = $url->clone;
301 10         159 $url->frag(undef);
302             }
303 39 50 33     1052 if (defined $url->equery and $self->url_takes_query($url, $type, $enc)) {
304 0 0       0 $url = $url->clone unless $cpy;
305 0         0 $url->query(undef);
306             }
307             $url
308 39         155 }
309              
310             =item known_names($url, $suggested, $type, $enc)
311              
312             The method find_name_by_url() will return the return value of this
313             method (unless L) immediately. Unless overriden, this method
314             returns the value of the hash option C indexed by the
315             $url. (By default this hash is empty.)
316              
317             If the option C is true, it is a fatal error if $url is
318             not a key of this hash.
319              
320             =cut
321              
322             sub known_names {
323 39     39 1 54 my ($self, $url) = @_;
324 39         221 my $f = $self->{known_names}{$url};
325 39 100       508 return $f if defined $f;
326              
327 0         0 die "URL with unknown name `$url'"
328 31 50 33     95 if $self->{only_known} and keys %{$self->{known_names}};
329 31         53 return;
330             }
331              
332             =item raw_name($url, $suggested, $type, $enc)
333              
334             Returns the 0th approximation to the filename of the resource; the
335             return value has two parts: the principal part, and the query string
336             (C if should not be used).
337              
338             If $suggested is undefined, returns the path part of the $url, and the
339             query part, if present and if option C is TRUE). Otherwise
340             either returns $suggested, or (if options C
341             and C are both true), returns the I part of the
342             $url with the last component changed to $suggested; the query part is
343             ignored in this case. In the latter case, if option C is TRUE, only the last path component of $suggested is used.
344              
345             =cut
346              
347             sub raw_name {
348 31     31 1 46 my ($self, $url, $suggested) = @_;
349 31 100       69 if (defined $suggested) {
350 4 100 100     29 if ($self->{suggested_only_basename} and $self->{hierarchical}) {
351 1         7 my @p = $url->path_segments;
352 1 50       47 $suggested =~ s,.*/,, if $self->{suggested_basename};
353 1         9 return join '/', @p[0..$#p-1], $suggested;
354             }
355 3         14 return $suggested;
356             } else {
357 27 100       82 my $q = $self->{use_query} ? $url->equery : undef;
358 27         518 return ($url->path, $q);
359             }
360             }
361              
362             =item protect_characters($f, $query, $url, $suggested, $type, $enc)
363              
364             Returns the filename $f with necessary character-by-character
365             translations performed. Unless overriden, it translates backslashes
366             to slashes if the option C is TRUE, replaces
367             characters matched by regular expression in the option C by
368             their hexadecimal representation (with the leader being the value of
369             the option C), and replaces percent signs by the value
370             of the option C.
371              
372             =cut
373              
374             sub protect_characters {
375 31     31 1 47 my ($self, $f) = @_;
376 31 50       137 $f =~ s,\\,/,g if $self->{fix_url_backslashes};
377             # Protect against funny characters, some filesystems can bark on them
378 31         1091 $f =~ s($self->{protect})
379 0         0 ( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge;
380 31 50       110 $f =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%';
381 31         67 $f
382             }
383              
384             =item protect_query($f, $query, $url, $suggested, $type, $enc)
385              
386             Returns $query with necessary character-by-character translations
387             performed. Unless overriden, it translates slashes, backslashes, and
388             characters matched byregular expression in the option C by
389             their hexadecimal representation (with the leader being the value of
390             the option C), and replaces percent signs by the value
391             of the option C.
392              
393             =cut
394              
395             sub protect_query {
396 31     31 1 76 my ($self, $f, $q) = @_;
397 31 100       92 return unless defined $q;
398             # Protect against funny characters, some filesystems can bark on them
399 7         45 $q =~ s($self->{protect})
400 49         232 ( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge;
401 7 50       388 $q =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%';
402 7         31 $q =~ s(([/\\]))
403 0         0 ( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge;
404 7         22 $q
405             }
406              
407             =item find_directory($f, $query, $url, $suggested, $type, $enc)
408              
409             Returns a triple of the appropriate directory name, the relative
410             filename, and a string to append to the filename, based on
411             processed-so-far filename $f and the $query string.
412              
413             Unless overriden, does the following: unless the option
414             C is TRUE, all but the last path components of $f are
415             ignored. If the option C is TRUE, the host part of the URL
416             (as well as the port part - if non-standard) are prepended to the
417             filename. The leading backslash is always stripped, and the option
418             C is used as the lead components of the directory name. If
419             $query is defined, and the option C is true, $f is used as
420             the last component of the directory, and $query as file name (with
421             option C prepended).
422              
423             (Dirname is assumed to be C-terminated.)
424              
425             =cut
426              
427             sub find_directory {
428 31     31 1 58 my ($self, $f, $q, $url) = @_;
429             # trim path until only the basename is left
430 31         138 $f =~ s|(.*/)||;
431 31 100 66     201 my $dirname = ($self->{hierarchical} and $1) ? $1 : '';
432 31         102 $dirname =~ s#^/##;
433              
434 31 100       70 if (defined $q) {
435 7         21 $q = "$self->{use_query}$q";
436 7 100       22 if ($self->{dir_query}) {
437 6         14 $dirname = "$dirname$f/"; # XXXX If it already exists as a file?
438 6         10 $f = $q;
439 6         12 $q = '';
440             }
441             } else {
442 24         38 $q = '';
443             }
444              
445 31 100       72 if ($self->{site_dir}) {
446 9         16 eval {
447 9         53 my $site = lc $url->host;
448 9         290 my $port = $url->port;
449 9         424 my $def = $url->default_port;
450 9 50       94 $port = '' if $port == $def;
451 9 50       20 $site .= "=port$port" if length $port;
452 9         35 $dirname = "$self->{root}/$site/$dirname";
453             };
454             } else {
455 22         62 $dirname = "$self->{root}/$dirname";
456             }
457 31         157 ($dirname, $f, $q)
458             }
459              
460             =item protect_directory($dirname, $f, $append, $url, $suggested, $type, $enc)
461              
462             Returns the provisional directory part of the filename. Unless
463             overriden, replaces empty components by the string C preceeded
464             by the value of C option; then applies the method
465             fix_component() to each component of the directory.
466              
467             =cut
468              
469             sub protect_directory {
470 31     31 1 50 my ($self, $dirname) = @_;
471 31         116 $dirname =~ s,/(?=/),/$self->{protect_pref}empty,g; # empty components
472 31         163 return join '/', map($self->fix_component($_,1), split m|/|, $dirname), '';
473             }
474              
475             =item directory_found($dirname, $f, $append, $url, $suggested, $type, $enc)
476              
477             A callback to process the calculated directory name. Unless
478             overriden, it creates the directory (with permissions per option
479             C) if the option C is TRUE.
480              
481             Actually, the directory name is the return value, so this is the last
482             chance to change the directory name...
483              
484             =cut
485              
486             sub directory_found {
487 31     31 1 101 my ($self, $dirname) = @_;
488 31 100 66     6371 mkpath $dirname, $self->{verbose}, $self->{dir_mode}
      100        
489             if $self->{mkpath} and length $dirname and not -d $dirname;
490 31         82 $dirname;
491             }
492              
493             # Copied from LWP::Mediatypes v1.32
494             my %suffixEncoding = (
495             'Z' => 'compress',
496             'gz' => 'gzip',
497             'hqx' => 'x-hqx',
498             'uu' => 'x-uuencode',
499             'z' => 'x-pack',
500             'bz2' => 'x-bzip2',
501             );
502             my %suffixDecoding = reverse %suffixEncoding;
503              
504             =item split_suffix($f, $dirname, $append, $url, $suggested, $type, $enc)
505              
506             Breaks the last component $f of the filename into a pair of basename
507             and suffix, which are returned. $dirname consists of other components
508             of the filename, $append is the string to append to the basename in
509             the future.
510              
511             Suffix may be empty, and is supposed to contain the leading dot (if
512             applicable); it may contain more than one dot. Unless overriden, the
513             suffix consists of all trailing non-empty started-by-dot groups with
514             length no more than given by the option C (not including
515             the leading dot).
516              
517             =cut
518              
519             sub split_suffix {
520 49     49 1 107 my ($self, $f, $dirname, $append, $url, $suggested, $type, $enc) = @_;
521              
522 49         50 my $suff;
523              
524 49         76 my $max = $self->{max_suff_len};
525 49         1385 (my $base = $f) =~ s<((?:\.[^/]{1,$max})*)$><>;
526 49         288 return ($base, "$1");
527             }
528              
529              
530             =item choose_suffix($f, $suff, $dirname, $append, $url, $suggested, $type, $enc)
531              
532             Returns a pair of basename and appropriate suffix for a file. $f is
533             the basename of the file, $suff is its suffix, $dirname consists of
534             other components of file names, $append is the string to append to the
535             basename.
536              
537             Different strategies applicable to this problem are:
538              
539             =over
540              
541             =item *
542              
543             keep the file extension;
544              
545             =item *
546              
547             replace by the "best" extension for this $type (and $enc);
548              
549             =item *
550              
551             replace by the user-specified type-specific extension.
552              
553             =back
554              
555             Any of these has two variants: whether we want the encodings reflected
556             in the suffix, or not. Unless overriden, chosing strategy/variant
557             consists of several rounds.
558              
559             In the first round, choose user-specified suffix if $type is defined,
560             and is (lowercased) in the option-hashes C and
561             C (choosing the variant based on which hash
562             matched). Keep the current suffix if $type is not defined, or option
563             C is TRUE and the current suffix of the file
564             matches $type and $enc (per database of known types and encodings).
565              
566             The second round runs if none of these was applicable. Choose
567             user-specified suffix if $type is (lowercased) in the hashes
568             C or C (choosing
569             variant as above); keep the current suffix if the type (lowercased) is
570             in the hashes C or C (depending on whether
571             $suff is empty or not).
572              
573             If none of these was applicable, the last round chooses the
574             appropriate suffix by the database of known types and encodings; if
575             not found, the existing suffix is preserved.
576              
577             =cut
578              
579             sub choose_suffix {
580 31     31 1 72 my ($self, $f, $suff, $dirname, $append, $url, $suggested, $type, $enc) = @_;
581              
582 31         47 my ($guess_suffix, $check_enc);
583 31 50       94 $enc = [] unless defined $enc;
584 31 50       95 $enc = [$enc] unless ref $enc;
585 31 100       160 if (not defined $type) { # Do nothing
    50          
    50          
    50          
586             } elsif (exists $self->{type_suff}{lc $type}) {
587 0         0 $suff = $self->{type_suff}{lc $type};
588 0         0 $check_enc = $enc;
589             } elsif (exists $self->{type_suff_no_enc}{lc $type}) {
590 0         0 $suff = $self->{type_suff}{lc $type};
591             } elsif ($self->{keepsuff_same_mediatype}) {
592 18         74 my($t, @enc) = guess_media_type($f);
593 18 50 33     1528 $guess_suffix = 1
      33        
594             unless defined $t and lc $t eq lc $type and lc "@enc" eq lc "@$enc";
595             } else {
596 0         0 $guess_suffix = 1;
597             }
598              
599 31 100       171 if (not $guess_suffix) { # No substitution
    100          
    50          
    50          
    50          
600             } elsif (exists $self->{type_suff_fallback}{lc $type}) {
601 0         0 $suff = $self->{type_suff_fallback}{lc $type};
602 0         0 $check_enc = $enc;
603             } elsif (exists $self->{type_suff_fallback_no_enc}{lc $type}) {
604 0         0 $suff = $self->{type_suff_fallback}{lc $type};
605             } elsif ((length $suff)
606             ? $self->{keep_suff}{lc $type}
607             : $self->{keep_nosuff}{lc $type}) { # No substitution
608             } else {
609 18         57 my $s = media_suffix($type);
610 18 50 33     227 if (defined $s and length $s) { # Known media type...
611 18         27 $suff = ".$s";
612 18         29 $check_enc = $enc;
613             }
614             }
615              
616 31 100       68 if ($check_enc) {
617 18         38 for my $e (@$enc) {
618 0 0       0 $suff .= $suffixDecoding{$e} if exists $suffixDecoding{$e};
619             }
620             }
621              
622 31         171 return ("$f$append", $suff);
623             }
624              
625             =item fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc)
626              
627             Returns a pair of basename and suffix for a file. $f is the last
628             component of the name of the file, $dirname consists of other
629             components. Unless overriden, this method replaces an empty basename
630             by C<"index"> and applies fix_component() method to the basename;
631             finally, if C<'8+3'> otion is set, it converts the filename and suffix
632             to a name suitable 8+3 filesystems.
633              
634             =cut
635              
636             sub fix_basename {
637 31     31 1 66 my ($self, $f, $dirname, $suffix) = @_;
638              
639 31 50       99 $f = "index" unless length $f;
640 31         67 $f = $self->fix_component($f,0); # Length ignores extension...
641 31 100       151 ($f, $suffix) = $self->eight_plus_three($f, $suffix) if $self->{'8+3'};
642 31         130 return ($f, $suffix);
643             }
644              
645             =item fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc)
646              
647             Given a basename, extension, and the directory part of the filename,
648             modifies the basename (if needed) to avoid duplicates; should return
649             the complete file name (combining the dirname, basename, and suffix).
650             Unless overriden, appends a number to the basename (shortening
651             basename if needed) so that the result is unique.
652              
653             This is a prime candidate for overriding (e.g., to ask user for
654             confirmation of overwrite).
655              
656             =cut
657              
658             sub fix_dups {
659 31     31 1 65 my ($self, $f, $dirname, $suff) = @_;
660              
661 31 100       75 return "$dirname$f$suff" if $self->{ignore_existing_files};
662 30         48 my $max_length = $self->{max_length};
663 30         48 my $extra = ""; # something to make the name unique
664 30 100       71 $max_length = 8 + length $suff if $self->{'8+3'};
665 30         187 while (1) {
666             # Construct a new file name; give up shortening if suffix is too long...
667 32 100 66     194 if ( $max_length and length "$f$extra$suff" > $max_length
      66        
668             and length "$extra$suff" < $max_length ) {
669 4         14 $f = substr $f, 0, $max_length - length "$extra$suff";
670             }
671 32         64 my $file = $dirname . $f . $extra . $suff;
672             # Check if it is unique
673 32 100       887 return $file unless -e $file;
674              
675 2 50       13 $extra = "000" unless $extra; # Try appending a number
676 2         5 $extra++;
677             }
678             }
679              
680             =item name_found($url, $f, $dirname, $suff, $suggested, $type, $enc)
681              
682             The callback method to register the found name. Unless overridden,
683             behaves like following: if option C is TRUE, stores the
684             found name in the C hash. Otherwise just returns the found name.
685              
686             =cut
687              
688             sub name_found {
689 31     31 1 75 my ($self, $url, $f, $dirname, $suff, $suggested, $type, $enc) = @_;
690              
691 31 100       108 return $f unless $self->{cache_name};
692 28         175 return $self->{known_names}{$url} = $f;
693             }
694              
695             =back
696              
697             =head2 Helper methods
698              
699             =over
700              
701             =item fix_component($component, $isdir)
702              
703             Returns a suitably modified value of a path component of a filename.
704             The non-overriden method massages unescapes embedded SPACE characters;
705             it removes starting/trailing, and converts the rest to C<_> unless the
706             option C is TRUE; removes trailing dots unless the option
707             C is TRUE; translates to lowercase if the option C
708             is TRUE, truncates to C if this option is set, and applies
709             the eight_plus_three() method if the option C<'8+3'> is set.
710              
711             =cut
712              
713             sub fix_component {
714 160     160 1 388 my ($self, $f, $isdir) = @_;
715              
716 160         259 $f =~ s/%20/ /g; # URL-encoded space is %20
717 160         423 $f =~ s/\E$self->{protect_pref}20/ /g; # URL-encoded space is %20
718 160 100       367 unless ($self->{keep_space}) { # Translate spaces in URL to underscores (_)
719 151         434 $f =~ s/^ *//; # Remove initial spaces from base
720 151         1066 $f =~ s/ *$//; # Remove trailing spaces from base
721              
722 151         259 $f =~ tr/ /_/;
723             }
724 160 50       456 $f =~ s/\.+$// unless $self->{keep_dots};
725 160 100       498 $f = lc $f if $self->{tolower}; # Output lower-case
726              
727 160 100 66     745 substr($f, $self->{max_length}) = ''
728             if $self->{max_length} and length $f > $self->{max_length};
729 160 100       345 return join '', $self->eight_plus_three($f) if $self->{'8+3'};
730 142         605 $f;
731             }
732              
733             =item eight_plus_three($fname, $suffix)
734              
735             Returns the value of filename modified for filesystems with 8+3
736             restriction on the filename (such as DOS). If $suffix is not given,
737             calculates it from $fname; otherwise $suffix should include the
738             leading dot, and $fname should have $suffix already removed. (Some
739             parts of info may be moved between suffix and filename if judged
740             appropriate.)
741              
742             =cut
743              
744             sub eight_plus_three {
745 20     20 1 29 my ($self, $f, $suff) = @_;
746              
747 20 100       56 ($f, $suff) = $self->split_suffix($f, undef, '') unless defined $suff;
748             # Try to move some info to a suffix even if it becomes too long
749 20 100 100     110 $suff = $2 if not length $suff and $f =~ s|(.{8,})\.(.*)$|$1|s ;
750              
751             # Balance multiple suffices between the parts
752 20   100     300 $f .= $1 while length($f) <= 6 and $suff =~ s/^(\..*?)(?=\...)//s;
753              
754 20 100 100     124 if (not length $suff and length($f) > 8) { # Move part of fname to suff
755 6         10 my $l = length($f) - 8;
756 6 50       12 $l = 3 if $l > 3;
757 6         12 $suff = substr $f, -$l, $l;
758 6         11 substr($f, -$l, $l) = '';
759             }
760 20         40 $f =~ s/\./_/g;
761 20         69 $suff =~ s/^\.//; # Temporary strip the leading dot
762 20         35 my $s = $self->{short_suffices}{$suff};
763 20 100       57 ($s = $suff) =~ s/\./_/g unless defined $s;
764              
765 20 100       45 substr($f, 8) = '' if length($f) > 8;
766 20 100       40 substr($s, 2, length($s)-3) = '' if length($s) > 3;
767 20 100       47 $s = ".$s" if length $s;
768 20         106 ($f, $s);
769             }
770              
771             =item url_takes_query($url [, $type, $encoding])
772              
773             This method returns TRUE if the I part of the URL is selecting
774             a part of the resource (i.e., if it is behaves as a I part,
775             and it is the client which should process this part). Such URLs are
776             detected by $type (should be in hash option C), or by
777             extension of the last path component (should be in hash option
778             C).
779              
780             =back
781              
782             =cut
783              
784             sub url_takes_query {
785 39     39 1 1073 my ($self, $url, $type) = @_;
786 39 50 66     175 return 1 if $type and $self->{queryless_types}{$type};
787 39         196 my @p = $url->path_segments;
788 39   66     3394 my ($ext) = (@p and $p[-1] =~ /.*\.(.*)$/);
789 39 100       314 $ext and $self->{queryless_ext}{$ext};
790             }
791              
792             =head1 Net::ChooseFName::Failer class
793              
794             A class which behaves as Net::ChooseFName, but always returns
795             C. For convenience, the constructor is duplicated as a class
796             method failer() in the class Net::ChooseFName.
797              
798             =cut
799              
800             # These always return undef; the caller is free to interpret this "to memory"
801 0     0   0 sub Net::ChooseFName::Failer::find_name_by_response {}
802 0     0   0 sub Net::ChooseFName::Failer::find_name_by_url {}
803 0     0   0 sub Net::ChooseFName::Failer::new {bless [], shift}
804 0     0 0 0 sub Net::ChooseFName::failer {bless [], 'Net::ChooseFName::Failer'}
805              
806             sub __Fix_broken_MediaTypes {
807 1     1   5 my @s = media_suffix('application/postscript');
808             # warn "Fixing `@s'...";
809             # if ($s[0] eq 'ai' or 1) { # [0] addresses in hash order; meaningless
810             # warn "Fixing...";
811 1         2227 @s = ('ps', grep $_ ne 'ps', @s);
812 1         8 add_type('application/postscript', @s);
813             # }
814             }
815             __Fix_broken_MediaTypes();
816              
817             1;
818             __END__