| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Net::ChooseFName; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
58285
|
use 5.005; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
45
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
60
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
984
|
use URI::URL 'url'; |
|
|
1
|
|
|
|
|
2002999
|
|
|
|
1
|
|
|
|
|
73
|
|
|
7
|
1
|
|
|
1
|
|
11
|
use File::Path 'mkpath'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
54
|
|
|
8
|
1
|
|
|
1
|
|
1393
|
use LWP::MediaTypes qw(guess_media_type media_suffix add_type); |
|
|
1
|
|
|
|
|
21603
|
|
|
|
1
|
|
|
|
|
196
|
|
|
9
|
1
|
|
|
1
|
|
13
|
use vars qw($VERSION); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
5379
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$VERSION = '0.01'; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Net::ChooseFName - Perl extension for choosing a name of a local mirror |
|
16
|
|
|
|
|
|
|
of a net (e.g., FTP or HTTP) resource. |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Net::ChooseFName; |
|
21
|
|
|
|
|
|
|
$namer = Net::ChooseFName->new(max_length => 64); # Copies to CD ok |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$name = $namer->find_name_by_response($LWP_response); |
|
24
|
|
|
|
|
|
|
$name = $namer->find_name_by_response($LWP_response, $as_if_content_type); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name, |
|
27
|
|
|
|
|
|
|
$content_type, $content_encoding); |
|
28
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name, $content_type); |
|
29
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url, $suggested_name); |
|
30
|
|
|
|
|
|
|
$name = $namer->find_name_by_url($url); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$namer_returns_undef = Net::ChooseFName->failer(); # Funny constructor |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
This module helps to pick up a local file name for a remote resource |
|
38
|
|
|
|
|
|
|
(e.g., one downloaded from Internet). It turns out that this is a |
|
39
|
|
|
|
|
|
|
tricky business; keep in mind that most servers are misconfigured, |
|
40
|
|
|
|
|
|
|
most URLs are malformed, and most filesystems are limited |
|
41
|
|
|
|
|
|
|
w.r.t. possible filenames. As a result most downloaders fail to work |
|
42
|
|
|
|
|
|
|
in some situations since they choose names which are not supported on |
|
43
|
|
|
|
|
|
|
particular filesystems, or not useful for C-related work. |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
Because of the many possible twists and ramifications, the design of |
|
46
|
|
|
|
|
|
|
this module is to be as much configurable as possible. One of ways of |
|
47
|
|
|
|
|
|
|
configurations is a rich system of options which influence |
|
48
|
|
|
|
|
|
|
different steps of the process. To cover cases when options are not |
|
49
|
|
|
|
|
|
|
flexible enough, the process is broken into many steps; each step is |
|
50
|
|
|
|
|
|
|
easily overridable by subclassing C. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The defaults are chosen to be as safe as possible while not getting |
|
53
|
|
|
|
|
|
|
very much into the ways. For example, since C<%> is a special |
|
54
|
|
|
|
|
|
|
character on DOSish shells, to simplify working from command line on |
|
55
|
|
|
|
|
|
|
such systems, we avoid this letter in generated file names. |
|
56
|
|
|
|
|
|
|
Similarly, since MacOS has problems with filenames with 8-bit |
|
57
|
|
|
|
|
|
|
characters, we avoid them too; since may Unix programs have problem |
|
58
|
|
|
|
|
|
|
with spaces in file names, we massage them into underscores; the |
|
59
|
|
|
|
|
|
|
length of the longest file path component is restricted to 255 chars. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Note that in many situations it is advisable to make these |
|
62
|
|
|
|
|
|
|
restrictions yet stronger. For example, for copying to CD one should |
|
63
|
|
|
|
|
|
|
restrict names yet more (C 64>); for copying to MSDOS |
|
64
|
|
|
|
|
|
|
file systems enable option C<'8+3' =E 1>. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
[In the description of methods the $self argument is omitted.] |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 Principal methods |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=over |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=item new(OPT1 => $val1, ...) |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Constructor method. Creates an object with given options. Default |
|
75
|
|
|
|
|
|
|
values for the unspecified options are (comments list in which methods |
|
76
|
|
|
|
|
|
|
this option is used): |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
protect => # protect_characters() |
|
79
|
|
|
|
|
|
|
# $1 should contain the match |
|
80
|
|
|
|
|
|
|
qr/([?*|\"<>\\:?#\x00-\x1F\x7F-\xFF\[\])/, |
|
81
|
|
|
|
|
|
|
protect_pref => '@', # protect_characters(), protect_directory() |
|
82
|
|
|
|
|
|
|
root => '.', # find_directory() |
|
83
|
|
|
|
|
|
|
dir_mode => 0775, # directory_found() |
|
84
|
|
|
|
|
|
|
mkpath => 1, # directory_found() |
|
85
|
|
|
|
|
|
|
max_suff_len => 4, # split_suffix() 'jpeg' |
|
86
|
|
|
|
|
|
|
keepsuff_same_mediatype => 1, # choose_suffix() |
|
87
|
|
|
|
|
|
|
type_suff => # choose_suffix() |
|
88
|
|
|
|
|
|
|
{'text/ftp-dir-listing' => '.dirl'} |
|
89
|
|
|
|
|
|
|
keep_suff => { text/plain => 1, |
|
90
|
|
|
|
|
|
|
application/octet-stream => 1 }, |
|
91
|
|
|
|
|
|
|
short_suffices => # eight_plus_three() |
|
92
|
|
|
|
|
|
|
{jpeg => 'jpg', html => 'htm', |
|
93
|
|
|
|
|
|
|
'tar.bz2' => 'tbz', 'tar.gz' => 'tgz'}, |
|
94
|
|
|
|
|
|
|
suggest_disposition => 1, # find_name_by_response() |
|
95
|
|
|
|
|
|
|
suggested_only_basename => 1, # find_name_by_response(), raw_name() |
|
96
|
|
|
|
|
|
|
fix_url_backslashes => 1, # protect_characters() |
|
97
|
|
|
|
|
|
|
max_length => 255, # fix_dups(), fix_component() |
|
98
|
|
|
|
|
|
|
cache_name => 1, # name_found() |
|
99
|
|
|
|
|
|
|
queryless_types => # url_takes_query() |
|
100
|
|
|
|
|
|
|
{ map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01 |
|
101
|
|
|
|
|
|
|
qw(image/djvu image/x-djvu image/dejavu image/x-dejavu |
|
102
|
|
|
|
|
|
|
image/djvw image/x.djvu image/vnd.djvu ))}, |
|
103
|
|
|
|
|
|
|
queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query() |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
The option C is special so that the user-specified value is |
|
106
|
|
|
|
|
|
|
I to this hash, and not I it. Similarly, the value |
|
107
|
|
|
|
|
|
|
of option C is used to populate the value for C |
|
108
|
|
|
|
|
|
|
of this hash. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Other, options have C as the default value. Their effects are |
|
111
|
|
|
|
|
|
|
documented in the documentation of the methods they affect. With the |
|
112
|
|
|
|
|
|
|
exception of C, these options are booleans. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
html_suff # new() |
|
115
|
|
|
|
|
|
|
known_names # known_names() name_found(); hash ref or undef |
|
116
|
|
|
|
|
|
|
only_known # known_names() |
|
117
|
|
|
|
|
|
|
hierarchical # raw_name(), find_directory() |
|
118
|
|
|
|
|
|
|
use_query # raw_name() |
|
119
|
|
|
|
|
|
|
8+3 # fix_basename(), fix_component() |
|
120
|
|
|
|
|
|
|
keep_space # fix_component() |
|
121
|
|
|
|
|
|
|
keep_dots # fix_component() |
|
122
|
|
|
|
|
|
|
tolower # fix_component() |
|
123
|
|
|
|
|
|
|
dir_query # find_directory() |
|
124
|
|
|
|
|
|
|
site_dir # find_directory() |
|
125
|
|
|
|
|
|
|
ignore_existing_files # fix_dups |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
keep_nosuff, type_suff_no_enc, type_suff_fallback, |
|
128
|
|
|
|
|
|
|
type_suff_fallback_no_enc # choose_suffix() |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Summary of the most useful in applications options (with defaults if |
|
131
|
|
|
|
|
|
|
applicable): |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
html_suff # Suffix for HTML (dot will be prepended) |
|
134
|
|
|
|
|
|
|
root => '.', # Where to put files? |
|
135
|
|
|
|
|
|
|
mkpath => 1, # Create directories with chosen names? |
|
136
|
|
|
|
|
|
|
max_length => 255, # Maximal length of a path component |
|
137
|
|
|
|
|
|
|
ignore_existing_files # Should the filename be "new"? |
|
138
|
|
|
|
|
|
|
cache_name => 1, # Return the same filename on the same URL, |
|
139
|
|
|
|
|
|
|
# even if file jumped to existence? |
|
140
|
|
|
|
|
|
|
hierarchical # Only the last component of URL path matters? |
|
141
|
|
|
|
|
|
|
suggested_only_basename => 1, # Should suggested name be relative the path? |
|
142
|
|
|
|
|
|
|
use_query # Do not ignore the query part of URL? |
|
143
|
|
|
|
|
|
|
# Value is used as (literal) prefix of query |
|
144
|
|
|
|
|
|
|
dir_query # Make the non-query part of URL a directory? |
|
145
|
|
|
|
|
|
|
site_dir # Put the hostname part of URL into directory? |
|
146
|
|
|
|
|
|
|
keepsuff_same_mediatype # Preserve the file extensions matching type? |
|
147
|
|
|
|
|
|
|
8+3 # Is the filesystem DOSish? |
|
148
|
|
|
|
|
|
|
keep_space # Map spaces in URL to spaces in filenames? |
|
149
|
|
|
|
|
|
|
tolower # Translate filenames to lowercase? |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
type_suff, type_suff_no_enc, type_suff_fallback, type_suff_fallback_no_enc, |
|
152
|
|
|
|
|
|
|
keep_suff, keep_nosuff # Hashes indexed by lowercased types; |
|
153
|
|
|
|
|
|
|
# Allow tuning choosing the suffix |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my $pr = '([?*|\"<>\\\\:?#\x00-\x1F\x7F-\xFF\\[\\]])'; |
|
158
|
|
|
|
|
|
|
my $defaults = { |
|
159
|
|
|
|
|
|
|
protect => eval("qr/$pr/") || $pr, |
|
160
|
|
|
|
|
|
|
protect_pref => '@', |
|
161
|
|
|
|
|
|
|
root => '.', |
|
162
|
|
|
|
|
|
|
dir_mode => 0775, |
|
163
|
|
|
|
|
|
|
mkpath => 1, |
|
164
|
|
|
|
|
|
|
keep_suff => {map(($_ => 1), |
|
165
|
|
|
|
|
|
|
qw( text/plain |
|
166
|
|
|
|
|
|
|
application/octet-stream application/download ))}, |
|
167
|
|
|
|
|
|
|
max_suff_len => 4, # 'jpeg' |
|
168
|
|
|
|
|
|
|
keepsuff_same_mediatype => 1, |
|
169
|
|
|
|
|
|
|
short_suffices => {qw( jpeg jpg tar.bz2 tbz html htm |
|
170
|
|
|
|
|
|
|
tar.gz tgz )}, # used if '8+3' is true |
|
171
|
|
|
|
|
|
|
suggest_disposition => 1, |
|
172
|
|
|
|
|
|
|
suggested_only_basename => 1, |
|
173
|
|
|
|
|
|
|
fix_url_backslashes => 1, |
|
174
|
|
|
|
|
|
|
max_length => 255, |
|
175
|
|
|
|
|
|
|
cache_name => 1, # name_found() |
|
176
|
|
|
|
|
|
|
queryless_types => # url_takes_query() |
|
177
|
|
|
|
|
|
|
{ map(($_ => 1), # http://filext.com/detaillist.php?extdetail=DJV 2005/01 |
|
178
|
|
|
|
|
|
|
qw(image/djvu image/x-djvu image/dejavu image/x-dejavu |
|
179
|
|
|
|
|
|
|
image/vnd.djvw image/djvw image/x.djvu))}, |
|
180
|
|
|
|
|
|
|
queryless_ext => { 'djvu' => 1, 'djv' => 1 }, # url_takes_query() |
|
181
|
|
|
|
|
|
|
}; |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
my %default_suff = ( |
|
184
|
|
|
|
|
|
|
'text/ftp-dir-listing' => '.dirl', |
|
185
|
|
|
|
|
|
|
); |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub new { |
|
188
|
21
|
|
|
21
|
1
|
18904
|
my $class = shift; |
|
189
|
21
|
|
|
|
|
386
|
my $self = bless {%$defaults, @_}, $class; |
|
190
|
21
|
|
50
|
|
|
164
|
$self->{type_suff} ||= {}; |
|
191
|
21
|
50
|
|
|
|
164
|
$self->{type_suff}{'text/html'} = ".$self->{html_suff}" |
|
192
|
|
|
|
|
|
|
if defined $self->{html_suff}; |
|
193
|
21
|
|
|
|
|
50
|
$self->{type_suff} = {%default_suff, %{$self->{type_suff}}}; |
|
|
21
|
|
|
|
|
82
|
|
|
194
|
21
|
|
|
|
|
75
|
$self; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item find_name_by_url($url, $suggested_name, $type, $enc) |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
This method returns a suitable filename for the resource given its URL. |
|
200
|
|
|
|
|
|
|
Optional arguments are a suggested name (possibly, it will be modified |
|
201
|
|
|
|
|
|
|
according to options of the object), the content-type, and the |
|
202
|
|
|
|
|
|
|
content-encoding of the resource. If multiple content-encodings are |
|
203
|
|
|
|
|
|
|
required, specify them as an array reference. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
A chain of helper methods (L<"Transformation chain">) is called to |
|
206
|
|
|
|
|
|
|
apply certain transformations to the name. C is returned if |
|
207
|
|
|
|
|
|
|
any of the helper methods (except known_names() and protect_query()) |
|
208
|
|
|
|
|
|
|
return undefined values; the caller is free to interpret this as "load |
|
209
|
|
|
|
|
|
|
to memory", if appropriate. These helper methods are listed in the |
|
210
|
|
|
|
|
|
|
following section. |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=cut |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub find_name_by_url { |
|
215
|
39
|
|
|
39
|
1
|
11087
|
my ($self, $url, $suggested, $type, $enc) = @_; |
|
216
|
|
|
|
|
|
|
|
|
217
|
39
|
50
|
|
|
|
109
|
defined($url = $self->url_2resource($url, $type, $enc)) or return; |
|
218
|
|
|
|
|
|
|
|
|
219
|
39
|
|
|
|
|
116
|
my $f = $self->known_names($url, $suggested, $type, $enc); |
|
220
|
39
|
100
|
|
|
|
132
|
return $f if defined $f; |
|
221
|
|
|
|
|
|
|
|
|
222
|
31
|
50
|
|
|
|
92
|
($f, my $q) = $self->raw_name($url, $suggested, $type, $enc) |
|
223
|
|
|
|
|
|
|
or return; |
|
224
|
|
|
|
|
|
|
|
|
225
|
31
|
|
|
|
|
1121
|
$f = $self->protect_characters($f, $q, $url, $suggested, $type, $enc); |
|
226
|
31
|
50
|
|
|
|
88
|
return unless defined $f; |
|
227
|
|
|
|
|
|
|
|
|
228
|
31
|
|
|
|
|
82
|
$q = $self->protect_query($f, $q, $url, $suggested, $type, $enc); |
|
229
|
|
|
|
|
|
|
|
|
230
|
31
|
50
|
|
|
|
98
|
(my $dirname, $f, $q) |
|
231
|
|
|
|
|
|
|
= $self->find_directory($f, $q, $url, $suggested, $type, $enc) |
|
232
|
|
|
|
|
|
|
or return; |
|
233
|
|
|
|
|
|
|
|
|
234
|
31
|
|
|
|
|
111
|
$dirname = $self->protect_directory($dirname, $f, $q, $url, $suggested, $type, $enc); |
|
235
|
31
|
50
|
|
|
|
110
|
return unless defined $dirname; |
|
236
|
|
|
|
|
|
|
|
|
237
|
31
|
|
|
|
|
77
|
$dirname = $self->directory_found($dirname, $f, $q, $url, $suggested, $type, $enc); |
|
238
|
31
|
50
|
|
|
|
81
|
return unless defined $dirname; |
|
239
|
|
|
|
|
|
|
|
|
240
|
31
|
50
|
|
|
|
226
|
($f, my $suff) = |
|
241
|
|
|
|
|
|
|
$self->split_suffix($f, $dirname, $q, $url, $suggested, $type, $enc) |
|
242
|
|
|
|
|
|
|
or return; |
|
243
|
31
|
50
|
|
|
|
115
|
($f, $suff) = $self->choose_suffix($f, $suff, $dirname, $q, $url, |
|
244
|
|
|
|
|
|
|
$suggested, $type, $enc) |
|
245
|
|
|
|
|
|
|
or return; |
|
246
|
31
|
50
|
|
|
|
107
|
($f, $suff) = |
|
247
|
|
|
|
|
|
|
$self->fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc) |
|
248
|
|
|
|
|
|
|
or return; |
|
249
|
31
|
|
|
|
|
98
|
$f = $self->fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc); |
|
250
|
31
|
50
|
|
|
|
74
|
return unless defined $f; |
|
251
|
31
|
|
|
|
|
105
|
return $self->name_found($url, $f, $dirname, $suff, $suggested, $type, $enc); |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item find_name_by_response($response [, $content_type]) |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
This method returns name given an LWP response object (and, |
|
257
|
|
|
|
|
|
|
optionally, an overriding C). If option |
|
258
|
|
|
|
|
|
|
C is TRUE, uses the header C |
|
259
|
|
|
|
|
|
|
from the response as the suggested name, then passes the fields from |
|
260
|
|
|
|
|
|
|
the response object to the method find_name_by_url(). |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=cut |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub find_name_by_response { |
|
265
|
0
|
|
|
0
|
1
|
0
|
my ($self, $res, $ct) = (shift, shift, shift); |
|
266
|
0
|
0
|
|
|
|
0
|
$ct = $res->content_type unless defined $ct; |
|
267
|
|
|
|
|
|
|
# "Content-Disposition" header is defined by RFC1806; supported by Netscape |
|
268
|
0
|
|
0
|
|
|
0
|
my $cd = $self->{suggest_disposition} && $res->header("Content-Disposition"); |
|
269
|
0
|
|
|
|
|
0
|
my $suggested; |
|
270
|
0
|
0
|
0
|
|
|
0
|
if ($cd && $cd =~ /\bfilename\s*=\s*(\S+)/) { |
|
271
|
0
|
|
|
|
|
0
|
$suggested = $1; |
|
272
|
0
|
|
|
|
|
0
|
$suggested =~ s/;$//; |
|
273
|
0
|
|
|
|
|
0
|
$suggested =~ s/^([\"\'])(.*)\1$/$2/; |
|
274
|
0
|
0
|
|
|
|
0
|
$suggested =~ s,.*[\\/],, if $self->{suggested_only_basename}; |
|
275
|
|
|
|
|
|
|
} |
|
276
|
0
|
|
|
|
|
0
|
$self->find_name_by_url($res->request->url, $suggested, |
|
277
|
|
|
|
|
|
|
$ct, $res->content_encoding); |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=back |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=head2 Transformation chain |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=over |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item url_2resource($url [, $type, $encoding]) |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
This method returns $url modified by removing the parts related to |
|
289
|
|
|
|
|
|
|
access to I of the resource. In particular, the I part is |
|
290
|
|
|
|
|
|
|
removed, as well as the I part if url_is_queryless() returns TRUE. |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub url_2resource { |
|
295
|
39
|
|
|
39
|
1
|
64
|
my ($self, $url, $type, $enc) = @_; |
|
296
|
|
|
|
|
|
|
|
|
297
|
39
|
50
|
|
|
|
194
|
$url = url($url) unless ref($url); |
|
298
|
39
|
|
|
|
|
19240
|
my $cpy; |
|
299
|
39
|
100
|
|
|
|
368
|
if (defined $url->frag) { |
|
300
|
10
|
|
|
|
|
344
|
$cpy = $url = $url->clone; |
|
301
|
10
|
|
|
|
|
159
|
$url->frag(undef); |
|
302
|
|
|
|
|
|
|
} |
|
303
|
39
|
50
|
33
|
|
|
1052
|
if (defined $url->equery and $self->url_takes_query($url, $type, $enc)) { |
|
304
|
0
|
0
|
|
|
|
0
|
$url = $url->clone unless $cpy; |
|
305
|
0
|
|
|
|
|
0
|
$url->query(undef); |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
$url |
|
308
|
39
|
|
|
|
|
155
|
} |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=item known_names($url, $suggested, $type, $enc) |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
The method find_name_by_url() will return the return value of this |
|
313
|
|
|
|
|
|
|
method (unless L) immediately. Unless overriden, this method |
|
314
|
|
|
|
|
|
|
returns the value of the hash option C indexed by the |
|
315
|
|
|
|
|
|
|
$url. (By default this hash is empty.) |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
If the option C is true, it is a fatal error if $url is |
|
318
|
|
|
|
|
|
|
not a key of this hash. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub known_names { |
|
323
|
39
|
|
|
39
|
1
|
54
|
my ($self, $url) = @_; |
|
324
|
39
|
|
|
|
|
221
|
my $f = $self->{known_names}{$url}; |
|
325
|
39
|
100
|
|
|
|
508
|
return $f if defined $f; |
|
326
|
|
|
|
|
|
|
|
|
327
|
0
|
|
|
|
|
0
|
die "URL with unknown name `$url'" |
|
328
|
31
|
50
|
33
|
|
|
95
|
if $self->{only_known} and keys %{$self->{known_names}}; |
|
329
|
31
|
|
|
|
|
53
|
return; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=item raw_name($url, $suggested, $type, $enc) |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Returns the 0th approximation to the filename of the resource; the |
|
335
|
|
|
|
|
|
|
return value has two parts: the principal part, and the query string |
|
336
|
|
|
|
|
|
|
(C if should not be used). |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
If $suggested is undefined, returns the path part of the $url, and the |
|
339
|
|
|
|
|
|
|
query part, if present and if option C is TRUE). Otherwise |
|
340
|
|
|
|
|
|
|
either returns $suggested, or (if options C |
|
341
|
|
|
|
|
|
|
and C are both true), returns the I part of the |
|
342
|
|
|
|
|
|
|
$url with the last component changed to $suggested; the query part is |
|
343
|
|
|
|
|
|
|
ignored in this case. In the latter case, if option C is TRUE, only the last path component of $suggested is used. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=cut |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub raw_name { |
|
348
|
31
|
|
|
31
|
1
|
46
|
my ($self, $url, $suggested) = @_; |
|
349
|
31
|
100
|
|
|
|
69
|
if (defined $suggested) { |
|
350
|
4
|
100
|
100
|
|
|
29
|
if ($self->{suggested_only_basename} and $self->{hierarchical}) { |
|
351
|
1
|
|
|
|
|
7
|
my @p = $url->path_segments; |
|
352
|
1
|
50
|
|
|
|
47
|
$suggested =~ s,.*/,, if $self->{suggested_basename}; |
|
353
|
1
|
|
|
|
|
9
|
return join '/', @p[0..$#p-1], $suggested; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
3
|
|
|
|
|
14
|
return $suggested; |
|
356
|
|
|
|
|
|
|
} else { |
|
357
|
27
|
100
|
|
|
|
82
|
my $q = $self->{use_query} ? $url->equery : undef; |
|
358
|
27
|
|
|
|
|
518
|
return ($url->path, $q); |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=item protect_characters($f, $query, $url, $suggested, $type, $enc) |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns the filename $f with necessary character-by-character |
|
365
|
|
|
|
|
|
|
translations performed. Unless overriden, it translates backslashes |
|
366
|
|
|
|
|
|
|
to slashes if the option C is TRUE, replaces |
|
367
|
|
|
|
|
|
|
characters matched by regular expression in the option C by |
|
368
|
|
|
|
|
|
|
their hexadecimal representation (with the leader being the value of |
|
369
|
|
|
|
|
|
|
the option C), and replaces percent signs by the value |
|
370
|
|
|
|
|
|
|
of the option C. |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub protect_characters { |
|
375
|
31
|
|
|
31
|
1
|
47
|
my ($self, $f) = @_; |
|
376
|
31
|
50
|
|
|
|
137
|
$f =~ s,\\,/,g if $self->{fix_url_backslashes}; |
|
377
|
|
|
|
|
|
|
# Protect against funny characters, some filesystems can bark on them |
|
378
|
31
|
|
|
|
|
1091
|
$f =~ s($self->{protect}) |
|
379
|
0
|
|
|
|
|
0
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
|
380
|
31
|
50
|
|
|
|
110
|
$f =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%'; |
|
381
|
31
|
|
|
|
|
67
|
$f |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item protect_query($f, $query, $url, $suggested, $type, $enc) |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Returns $query with necessary character-by-character translations |
|
387
|
|
|
|
|
|
|
performed. Unless overriden, it translates slashes, backslashes, and |
|
388
|
|
|
|
|
|
|
characters matched byregular expression in the option C by |
|
389
|
|
|
|
|
|
|
their hexadecimal representation (with the leader being the value of |
|
390
|
|
|
|
|
|
|
the option C), and replaces percent signs by the value |
|
391
|
|
|
|
|
|
|
of the option C. |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub protect_query { |
|
396
|
31
|
|
|
31
|
1
|
76
|
my ($self, $f, $q) = @_; |
|
397
|
31
|
100
|
|
|
|
92
|
return unless defined $q; |
|
398
|
|
|
|
|
|
|
# Protect against funny characters, some filesystems can bark on them |
|
399
|
7
|
|
|
|
|
45
|
$q =~ s($self->{protect}) |
|
400
|
49
|
|
|
|
|
232
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
|
401
|
7
|
50
|
|
|
|
388
|
$q =~ s(%)($self->{protect_pref})g if $self->{protect_pref} ne '%'; |
|
402
|
7
|
|
|
|
|
31
|
$q =~ s(([/\\])) |
|
403
|
0
|
|
|
|
|
0
|
( sprintf '%s%02X', $self->{protect_pref}, ord $1 )ge; |
|
404
|
7
|
|
|
|
|
22
|
$q |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item find_directory($f, $query, $url, $suggested, $type, $enc) |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Returns a triple of the appropriate directory name, the relative |
|
410
|
|
|
|
|
|
|
filename, and a string to append to the filename, based on |
|
411
|
|
|
|
|
|
|
processed-so-far filename $f and the $query string. |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Unless overriden, does the following: unless the option |
|
414
|
|
|
|
|
|
|
C is TRUE, all but the last path components of $f are |
|
415
|
|
|
|
|
|
|
ignored. If the option C is TRUE, the host part of the URL |
|
416
|
|
|
|
|
|
|
(as well as the port part - if non-standard) are prepended to the |
|
417
|
|
|
|
|
|
|
filename. The leading backslash is always stripped, and the option |
|
418
|
|
|
|
|
|
|
C is used as the lead components of the directory name. If |
|
419
|
|
|
|
|
|
|
$query is defined, and the option C is true, $f is used as |
|
420
|
|
|
|
|
|
|
the last component of the directory, and $query as file name (with |
|
421
|
|
|
|
|
|
|
option C prepended). |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
(Dirname is assumed to be C>-terminated.) |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=cut |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
sub find_directory { |
|
428
|
31
|
|
|
31
|
1
|
58
|
my ($self, $f, $q, $url) = @_; |
|
429
|
|
|
|
|
|
|
# trim path until only the basename is left |
|
430
|
31
|
|
|
|
|
138
|
$f =~ s|(.*/)||; |
|
431
|
31
|
100
|
66
|
|
|
201
|
my $dirname = ($self->{hierarchical} and $1) ? $1 : ''; |
|
432
|
31
|
|
|
|
|
102
|
$dirname =~ s#^/##; |
|
433
|
|
|
|
|
|
|
|
|
434
|
31
|
100
|
|
|
|
70
|
if (defined $q) { |
|
435
|
7
|
|
|
|
|
21
|
$q = "$self->{use_query}$q"; |
|
436
|
7
|
100
|
|
|
|
22
|
if ($self->{dir_query}) { |
|
437
|
6
|
|
|
|
|
14
|
$dirname = "$dirname$f/"; # XXXX If it already exists as a file? |
|
438
|
6
|
|
|
|
|
10
|
$f = $q; |
|
439
|
6
|
|
|
|
|
12
|
$q = ''; |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
} else { |
|
442
|
24
|
|
|
|
|
38
|
$q = ''; |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
|
|
445
|
31
|
100
|
|
|
|
72
|
if ($self->{site_dir}) { |
|
446
|
9
|
|
|
|
|
16
|
eval { |
|
447
|
9
|
|
|
|
|
53
|
my $site = lc $url->host; |
|
448
|
9
|
|
|
|
|
290
|
my $port = $url->port; |
|
449
|
9
|
|
|
|
|
424
|
my $def = $url->default_port; |
|
450
|
9
|
50
|
|
|
|
94
|
$port = '' if $port == $def; |
|
451
|
9
|
50
|
|
|
|
20
|
$site .= "=port$port" if length $port; |
|
452
|
9
|
|
|
|
|
35
|
$dirname = "$self->{root}/$site/$dirname"; |
|
453
|
|
|
|
|
|
|
}; |
|
454
|
|
|
|
|
|
|
} else { |
|
455
|
22
|
|
|
|
|
62
|
$dirname = "$self->{root}/$dirname"; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
31
|
|
|
|
|
157
|
($dirname, $f, $q) |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=item protect_directory($dirname, $f, $append, $url, $suggested, $type, $enc) |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Returns the provisional directory part of the filename. Unless |
|
463
|
|
|
|
|
|
|
overriden, replaces empty components by the string C preceeded |
|
464
|
|
|
|
|
|
|
by the value of C option; then applies the method |
|
465
|
|
|
|
|
|
|
fix_component() to each component of the directory. |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=cut |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub protect_directory { |
|
470
|
31
|
|
|
31
|
1
|
50
|
my ($self, $dirname) = @_; |
|
471
|
31
|
|
|
|
|
116
|
$dirname =~ s,/(?=/),/$self->{protect_pref}empty,g; # empty components |
|
472
|
31
|
|
|
|
|
163
|
return join '/', map($self->fix_component($_,1), split m|/|, $dirname), ''; |
|
473
|
|
|
|
|
|
|
} |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item directory_found($dirname, $f, $append, $url, $suggested, $type, $enc) |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
A callback to process the calculated directory name. Unless |
|
478
|
|
|
|
|
|
|
overriden, it creates the directory (with permissions per option |
|
479
|
|
|
|
|
|
|
C) if the option C is TRUE. |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
Actually, the directory name is the return value, so this is the last |
|
482
|
|
|
|
|
|
|
chance to change the directory name... |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=cut |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub directory_found { |
|
487
|
31
|
|
|
31
|
1
|
101
|
my ($self, $dirname) = @_; |
|
488
|
31
|
100
|
66
|
|
|
6371
|
mkpath $dirname, $self->{verbose}, $self->{dir_mode} |
|
|
|
|
100
|
|
|
|
|
|
489
|
|
|
|
|
|
|
if $self->{mkpath} and length $dirname and not -d $dirname; |
|
490
|
31
|
|
|
|
|
82
|
$dirname; |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# Copied from LWP::Mediatypes v1.32 |
|
494
|
|
|
|
|
|
|
my %suffixEncoding = ( |
|
495
|
|
|
|
|
|
|
'Z' => 'compress', |
|
496
|
|
|
|
|
|
|
'gz' => 'gzip', |
|
497
|
|
|
|
|
|
|
'hqx' => 'x-hqx', |
|
498
|
|
|
|
|
|
|
'uu' => 'x-uuencode', |
|
499
|
|
|
|
|
|
|
'z' => 'x-pack', |
|
500
|
|
|
|
|
|
|
'bz2' => 'x-bzip2', |
|
501
|
|
|
|
|
|
|
); |
|
502
|
|
|
|
|
|
|
my %suffixDecoding = reverse %suffixEncoding; |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=item split_suffix($f, $dirname, $append, $url, $suggested, $type, $enc) |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
Breaks the last component $f of the filename into a pair of basename |
|
507
|
|
|
|
|
|
|
and suffix, which are returned. $dirname consists of other components |
|
508
|
|
|
|
|
|
|
of the filename, $append is the string to append to the basename in |
|
509
|
|
|
|
|
|
|
the future. |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Suffix may be empty, and is supposed to contain the leading dot (if |
|
512
|
|
|
|
|
|
|
applicable); it may contain more than one dot. Unless overriden, the |
|
513
|
|
|
|
|
|
|
suffix consists of all trailing non-empty started-by-dot groups with |
|
514
|
|
|
|
|
|
|
length no more than given by the option C (not including |
|
515
|
|
|
|
|
|
|
the leading dot). |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub split_suffix { |
|
520
|
49
|
|
|
49
|
1
|
107
|
my ($self, $f, $dirname, $append, $url, $suggested, $type, $enc) = @_; |
|
521
|
|
|
|
|
|
|
|
|
522
|
49
|
|
|
|
|
50
|
my $suff; |
|
523
|
|
|
|
|
|
|
|
|
524
|
49
|
|
|
|
|
76
|
my $max = $self->{max_suff_len}; |
|
525
|
49
|
|
|
|
|
1385
|
(my $base = $f) =~ s<((?:\.[^/]{1,$max})*)$><>; |
|
526
|
49
|
|
|
|
|
288
|
return ($base, "$1"); |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=item choose_suffix($f, $suff, $dirname, $append, $url, $suggested, $type, $enc) |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Returns a pair of basename and appropriate suffix for a file. $f is |
|
533
|
|
|
|
|
|
|
the basename of the file, $suff is its suffix, $dirname consists of |
|
534
|
|
|
|
|
|
|
other components of file names, $append is the string to append to the |
|
535
|
|
|
|
|
|
|
basename. |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Different strategies applicable to this problem are: |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=over |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=item * |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
keep the file extension; |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item * |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
replace by the "best" extension for this $type (and $enc); |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=item * |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
replace by the user-specified type-specific extension. |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=back |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Any of these has two variants: whether we want the encodings reflected |
|
556
|
|
|
|
|
|
|
in the suffix, or not. Unless overriden, chosing strategy/variant |
|
557
|
|
|
|
|
|
|
consists of several rounds. |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
In the first round, choose user-specified suffix if $type is defined, |
|
560
|
|
|
|
|
|
|
and is (lowercased) in the option-hashes C and |
|
561
|
|
|
|
|
|
|
C (choosing the variant based on which hash |
|
562
|
|
|
|
|
|
|
matched). Keep the current suffix if $type is not defined, or option |
|
563
|
|
|
|
|
|
|
C is TRUE and the current suffix of the file |
|
564
|
|
|
|
|
|
|
matches $type and $enc (per database of known types and encodings). |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
The second round runs if none of these was applicable. Choose |
|
567
|
|
|
|
|
|
|
user-specified suffix if $type is (lowercased) in the hashes |
|
568
|
|
|
|
|
|
|
C or C (choosing |
|
569
|
|
|
|
|
|
|
variant as above); keep the current suffix if the type (lowercased) is |
|
570
|
|
|
|
|
|
|
in the hashes C or C (depending on whether |
|
571
|
|
|
|
|
|
|
$suff is empty or not). |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
If none of these was applicable, the last round chooses the |
|
574
|
|
|
|
|
|
|
appropriate suffix by the database of known types and encodings; if |
|
575
|
|
|
|
|
|
|
not found, the existing suffix is preserved. |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub choose_suffix { |
|
580
|
31
|
|
|
31
|
1
|
72
|
my ($self, $f, $suff, $dirname, $append, $url, $suggested, $type, $enc) = @_; |
|
581
|
|
|
|
|
|
|
|
|
582
|
31
|
|
|
|
|
47
|
my ($guess_suffix, $check_enc); |
|
583
|
31
|
50
|
|
|
|
94
|
$enc = [] unless defined $enc; |
|
584
|
31
|
50
|
|
|
|
95
|
$enc = [$enc] unless ref $enc; |
|
585
|
31
|
100
|
|
|
|
160
|
if (not defined $type) { # Do nothing |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff}{lc $type}) { |
|
587
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff}{lc $type}; |
|
588
|
0
|
|
|
|
|
0
|
$check_enc = $enc; |
|
589
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_no_enc}{lc $type}) { |
|
590
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff}{lc $type}; |
|
591
|
|
|
|
|
|
|
} elsif ($self->{keepsuff_same_mediatype}) { |
|
592
|
18
|
|
|
|
|
74
|
my($t, @enc) = guess_media_type($f); |
|
593
|
18
|
50
|
33
|
|
|
1528
|
$guess_suffix = 1 |
|
|
|
|
33
|
|
|
|
|
|
594
|
|
|
|
|
|
|
unless defined $t and lc $t eq lc $type and lc "@enc" eq lc "@$enc"; |
|
595
|
|
|
|
|
|
|
} else { |
|
596
|
0
|
|
|
|
|
0
|
$guess_suffix = 1; |
|
597
|
|
|
|
|
|
|
} |
|
598
|
|
|
|
|
|
|
|
|
599
|
31
|
100
|
|
|
|
171
|
if (not $guess_suffix) { # No substitution |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_fallback}{lc $type}) { |
|
601
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff_fallback}{lc $type}; |
|
602
|
0
|
|
|
|
|
0
|
$check_enc = $enc; |
|
603
|
|
|
|
|
|
|
} elsif (exists $self->{type_suff_fallback_no_enc}{lc $type}) { |
|
604
|
0
|
|
|
|
|
0
|
$suff = $self->{type_suff_fallback}{lc $type}; |
|
605
|
|
|
|
|
|
|
} elsif ((length $suff) |
|
606
|
|
|
|
|
|
|
? $self->{keep_suff}{lc $type} |
|
607
|
|
|
|
|
|
|
: $self->{keep_nosuff}{lc $type}) { # No substitution |
|
608
|
|
|
|
|
|
|
} else { |
|
609
|
18
|
|
|
|
|
57
|
my $s = media_suffix($type); |
|
610
|
18
|
50
|
33
|
|
|
227
|
if (defined $s and length $s) { # Known media type... |
|
611
|
18
|
|
|
|
|
27
|
$suff = ".$s"; |
|
612
|
18
|
|
|
|
|
29
|
$check_enc = $enc; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
|
|
616
|
31
|
100
|
|
|
|
68
|
if ($check_enc) { |
|
617
|
18
|
|
|
|
|
38
|
for my $e (@$enc) { |
|
618
|
0
|
0
|
|
|
|
0
|
$suff .= $suffixDecoding{$e} if exists $suffixDecoding{$e}; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
} |
|
621
|
|
|
|
|
|
|
|
|
622
|
31
|
|
|
|
|
171
|
return ("$f$append", $suff); |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
=item fix_basename($f, $dirname, $suff, $url, $suggested, $type, $enc) |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
Returns a pair of basename and suffix for a file. $f is the last |
|
628
|
|
|
|
|
|
|
component of the name of the file, $dirname consists of other |
|
629
|
|
|
|
|
|
|
components. Unless overriden, this method replaces an empty basename |
|
630
|
|
|
|
|
|
|
by C<"index"> and applies fix_component() method to the basename; |
|
631
|
|
|
|
|
|
|
finally, if C<'8+3'> otion is set, it converts the filename and suffix |
|
632
|
|
|
|
|
|
|
to a name suitable 8+3 filesystems. |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=cut |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
sub fix_basename { |
|
637
|
31
|
|
|
31
|
1
|
66
|
my ($self, $f, $dirname, $suffix) = @_; |
|
638
|
|
|
|
|
|
|
|
|
639
|
31
|
50
|
|
|
|
99
|
$f = "index" unless length $f; |
|
640
|
31
|
|
|
|
|
67
|
$f = $self->fix_component($f,0); # Length ignores extension... |
|
641
|
31
|
100
|
|
|
|
151
|
($f, $suffix) = $self->eight_plus_three($f, $suffix) if $self->{'8+3'}; |
|
642
|
31
|
|
|
|
|
130
|
return ($f, $suffix); |
|
643
|
|
|
|
|
|
|
} |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=item fix_dups($f, $dirname, $suff, $url, $suggested, $type, $enc) |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Given a basename, extension, and the directory part of the filename, |
|
648
|
|
|
|
|
|
|
modifies the basename (if needed) to avoid duplicates; should return |
|
649
|
|
|
|
|
|
|
the complete file name (combining the dirname, basename, and suffix). |
|
650
|
|
|
|
|
|
|
Unless overriden, appends a number to the basename (shortening |
|
651
|
|
|
|
|
|
|
basename if needed) so that the result is unique. |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
This is a prime candidate for overriding (e.g., to ask user for |
|
654
|
|
|
|
|
|
|
confirmation of overwrite). |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=cut |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub fix_dups { |
|
659
|
31
|
|
|
31
|
1
|
65
|
my ($self, $f, $dirname, $suff) = @_; |
|
660
|
|
|
|
|
|
|
|
|
661
|
31
|
100
|
|
|
|
75
|
return "$dirname$f$suff" if $self->{ignore_existing_files}; |
|
662
|
30
|
|
|
|
|
48
|
my $max_length = $self->{max_length}; |
|
663
|
30
|
|
|
|
|
48
|
my $extra = ""; # something to make the name unique |
|
664
|
30
|
100
|
|
|
|
71
|
$max_length = 8 + length $suff if $self->{'8+3'}; |
|
665
|
30
|
|
|
|
|
187
|
while (1) { |
|
666
|
|
|
|
|
|
|
# Construct a new file name; give up shortening if suffix is too long... |
|
667
|
32
|
100
|
66
|
|
|
194
|
if ( $max_length and length "$f$extra$suff" > $max_length |
|
|
|
|
66
|
|
|
|
|
|
668
|
|
|
|
|
|
|
and length "$extra$suff" < $max_length ) { |
|
669
|
4
|
|
|
|
|
14
|
$f = substr $f, 0, $max_length - length "$extra$suff"; |
|
670
|
|
|
|
|
|
|
} |
|
671
|
32
|
|
|
|
|
64
|
my $file = $dirname . $f . $extra . $suff; |
|
672
|
|
|
|
|
|
|
# Check if it is unique |
|
673
|
32
|
100
|
|
|
|
887
|
return $file unless -e $file; |
|
674
|
|
|
|
|
|
|
|
|
675
|
2
|
50
|
|
|
|
13
|
$extra = "000" unless $extra; # Try appending a number |
|
676
|
2
|
|
|
|
|
5
|
$extra++; |
|
677
|
|
|
|
|
|
|
} |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=item name_found($url, $f, $dirname, $suff, $suggested, $type, $enc) |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
The callback method to register the found name. Unless overridden, |
|
683
|
|
|
|
|
|
|
behaves like following: if option C is TRUE, stores the |
|
684
|
|
|
|
|
|
|
found name in the C hash. Otherwise just returns the found name. |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=cut |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub name_found { |
|
689
|
31
|
|
|
31
|
1
|
75
|
my ($self, $url, $f, $dirname, $suff, $suggested, $type, $enc) = @_; |
|
690
|
|
|
|
|
|
|
|
|
691
|
31
|
100
|
|
|
|
108
|
return $f unless $self->{cache_name}; |
|
692
|
28
|
|
|
|
|
175
|
return $self->{known_names}{$url} = $f; |
|
693
|
|
|
|
|
|
|
} |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
=back |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=head2 Helper methods |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=over |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=item fix_component($component, $isdir) |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Returns a suitably modified value of a path component of a filename. |
|
704
|
|
|
|
|
|
|
The non-overriden method massages unescapes embedded SPACE characters; |
|
705
|
|
|
|
|
|
|
it removes starting/trailing, and converts the rest to C<_> unless the |
|
706
|
|
|
|
|
|
|
option C is TRUE; removes trailing dots unless the option |
|
707
|
|
|
|
|
|
|
C is TRUE; translates to lowercase if the option C |
|
708
|
|
|
|
|
|
|
is TRUE, truncates to C if this option is set, and applies |
|
709
|
|
|
|
|
|
|
the eight_plus_three() method if the option C<'8+3'> is set. |
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=cut |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
sub fix_component { |
|
714
|
160
|
|
|
160
|
1
|
388
|
my ($self, $f, $isdir) = @_; |
|
715
|
|
|
|
|
|
|
|
|
716
|
160
|
|
|
|
|
259
|
$f =~ s/%20/ /g; # URL-encoded space is %20 |
|
717
|
160
|
|
|
|
|
423
|
$f =~ s/\E$self->{protect_pref}20/ /g; # URL-encoded space is %20 |
|
718
|
160
|
100
|
|
|
|
367
|
unless ($self->{keep_space}) { # Translate spaces in URL to underscores (_) |
|
719
|
151
|
|
|
|
|
434
|
$f =~ s/^ *//; # Remove initial spaces from base |
|
720
|
151
|
|
|
|
|
1066
|
$f =~ s/ *$//; # Remove trailing spaces from base |
|
721
|
|
|
|
|
|
|
|
|
722
|
151
|
|
|
|
|
259
|
$f =~ tr/ /_/; |
|
723
|
|
|
|
|
|
|
} |
|
724
|
160
|
50
|
|
|
|
456
|
$f =~ s/\.+$// unless $self->{keep_dots}; |
|
725
|
160
|
100
|
|
|
|
498
|
$f = lc $f if $self->{tolower}; # Output lower-case |
|
726
|
|
|
|
|
|
|
|
|
727
|
160
|
100
|
66
|
|
|
745
|
substr($f, $self->{max_length}) = '' |
|
728
|
|
|
|
|
|
|
if $self->{max_length} and length $f > $self->{max_length}; |
|
729
|
160
|
100
|
|
|
|
345
|
return join '', $self->eight_plus_three($f) if $self->{'8+3'}; |
|
730
|
142
|
|
|
|
|
605
|
$f; |
|
731
|
|
|
|
|
|
|
} |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=item eight_plus_three($fname, $suffix) |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
Returns the value of filename modified for filesystems with 8+3 |
|
736
|
|
|
|
|
|
|
restriction on the filename (such as DOS). If $suffix is not given, |
|
737
|
|
|
|
|
|
|
calculates it from $fname; otherwise $suffix should include the |
|
738
|
|
|
|
|
|
|
leading dot, and $fname should have $suffix already removed. (Some |
|
739
|
|
|
|
|
|
|
parts of info may be moved between suffix and filename if judged |
|
740
|
|
|
|
|
|
|
appropriate.) |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=cut |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
sub eight_plus_three { |
|
745
|
20
|
|
|
20
|
1
|
29
|
my ($self, $f, $suff) = @_; |
|
746
|
|
|
|
|
|
|
|
|
747
|
20
|
100
|
|
|
|
56
|
($f, $suff) = $self->split_suffix($f, undef, '') unless defined $suff; |
|
748
|
|
|
|
|
|
|
# Try to move some info to a suffix even if it becomes too long |
|
749
|
20
|
100
|
100
|
|
|
110
|
$suff = $2 if not length $suff and $f =~ s|(.{8,})\.(.*)$|$1|s ; |
|
750
|
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
# Balance multiple suffices between the parts |
|
752
|
20
|
|
100
|
|
|
300
|
$f .= $1 while length($f) <= 6 and $suff =~ s/^(\..*?)(?=\...)//s; |
|
753
|
|
|
|
|
|
|
|
|
754
|
20
|
100
|
100
|
|
|
124
|
if (not length $suff and length($f) > 8) { # Move part of fname to suff |
|
755
|
6
|
|
|
|
|
10
|
my $l = length($f) - 8; |
|
756
|
6
|
50
|
|
|
|
12
|
$l = 3 if $l > 3; |
|
757
|
6
|
|
|
|
|
12
|
$suff = substr $f, -$l, $l; |
|
758
|
6
|
|
|
|
|
11
|
substr($f, -$l, $l) = ''; |
|
759
|
|
|
|
|
|
|
} |
|
760
|
20
|
|
|
|
|
40
|
$f =~ s/\./_/g; |
|
761
|
20
|
|
|
|
|
69
|
$suff =~ s/^\.//; # Temporary strip the leading dot |
|
762
|
20
|
|
|
|
|
35
|
my $s = $self->{short_suffices}{$suff}; |
|
763
|
20
|
100
|
|
|
|
57
|
($s = $suff) =~ s/\./_/g unless defined $s; |
|
764
|
|
|
|
|
|
|
|
|
765
|
20
|
100
|
|
|
|
45
|
substr($f, 8) = '' if length($f) > 8; |
|
766
|
20
|
100
|
|
|
|
40
|
substr($s, 2, length($s)-3) = '' if length($s) > 3; |
|
767
|
20
|
100
|
|
|
|
47
|
$s = ".$s" if length $s; |
|
768
|
20
|
|
|
|
|
106
|
($f, $s); |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=item url_takes_query($url [, $type, $encoding]) |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
This method returns TRUE if the I part of the URL is selecting |
|
774
|
|
|
|
|
|
|
a part of the resource (i.e., if it is behaves as a I part, |
|
775
|
|
|
|
|
|
|
and it is the client which should process this part). Such URLs are |
|
776
|
|
|
|
|
|
|
detected by $type (should be in hash option C), or by |
|
777
|
|
|
|
|
|
|
extension of the last path component (should be in hash option |
|
778
|
|
|
|
|
|
|
C). |
|
779
|
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=back |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=cut |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub url_takes_query { |
|
785
|
39
|
|
|
39
|
1
|
1073
|
my ($self, $url, $type) = @_; |
|
786
|
39
|
50
|
66
|
|
|
175
|
return 1 if $type and $self->{queryless_types}{$type}; |
|
787
|
39
|
|
|
|
|
196
|
my @p = $url->path_segments; |
|
788
|
39
|
|
66
|
|
|
3394
|
my ($ext) = (@p and $p[-1] =~ /.*\.(.*)$/); |
|
789
|
39
|
100
|
|
|
|
314
|
$ext and $self->{queryless_ext}{$ext}; |
|
790
|
|
|
|
|
|
|
} |
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
=head1 Net::ChooseFName::Failer class |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
A class which behaves as Net::ChooseFName, but always returns |
|
795
|
|
|
|
|
|
|
C. For convenience, the constructor is duplicated as a class |
|
796
|
|
|
|
|
|
|
method failer() in the class Net::ChooseFName. |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=cut |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
# These always return undef; the caller is free to interpret this "to memory" |
|
801
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::find_name_by_response {} |
|
802
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::find_name_by_url {} |
|
803
|
0
|
|
|
0
|
|
0
|
sub Net::ChooseFName::Failer::new {bless [], shift} |
|
804
|
0
|
|
|
0
|
0
|
0
|
sub Net::ChooseFName::failer {bless [], 'Net::ChooseFName::Failer'} |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
sub __Fix_broken_MediaTypes { |
|
807
|
1
|
|
|
1
|
|
5
|
my @s = media_suffix('application/postscript'); |
|
808
|
|
|
|
|
|
|
# warn "Fixing `@s'..."; |
|
809
|
|
|
|
|
|
|
# if ($s[0] eq 'ai' or 1) { # [0] addresses in hash order; meaningless |
|
810
|
|
|
|
|
|
|
# warn "Fixing..."; |
|
811
|
1
|
|
|
|
|
2227
|
@s = ('ps', grep $_ ne 'ps', @s); |
|
812
|
1
|
|
|
|
|
8
|
add_type('application/postscript', @s); |
|
813
|
|
|
|
|
|
|
# } |
|
814
|
|
|
|
|
|
|
} |
|
815
|
|
|
|
|
|
|
__Fix_broken_MediaTypes(); |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
1; |
|
818
|
|
|
|
|
|
|
__END__ |