File Coverage

blib/lib/WWW/Link_Controller/InfoStruc.pm
Criterion Covered Total %
statement 115 121 95.0
branch 51 88 57.9
condition 6 12 50.0
subroutine 11 11 100.0
pod 2 4 50.0
total 185 236 78.3


line stmt bran cond sub pod time code
1             package WWW::Link_Controller::InfoStruc;
2             $REVISION=q$Revision: 1.10 $ ; $VERSION = sprintf ( "%d.%02d", $REVISION =~ /(\d+).(\d+)/ );
3              
4 1     1   608 use Carp qw(carp croak cluck);
  1         1  
  1         73  
5 1     1   5 use strict;
  1         1  
  1         30  
6 1     1   5 use warnings;
  1         2  
  1         26  
7 1     1   1004 use Search::Binary;
  1         27  
  1         2027  
8              
9             our ($no_warn, $verbose);
10              
11             $no_warn = 0 unless defined $no_warn;
12              
13             =head1 NAME
14              
15             WWW::Link_Controller::InfoStruc - read infostructure config.
16              
17             =head1 DESCRIPTION
18              
19             This is a simple module for reading infostructure configuration.
20              
21             =cut
22              
23             $verbose=0 unless defined $verbose;
24              
25             sub default_infostrucs () {
26 2     2 0 86 WWW::Link_Controller::InfoStruc::read_infostruc_file
27             ( $::infostrucs, \%::infostrucs, \@::infostrucs,
28             \@::infostruc_urls_sorted,\@::infostruc_files_sorted );
29             }
30              
31             sub read_infostruc_file ($$$) {
32 2     2 0 3 my $filename=shift;
33 2         3 my $infostruc_hash=shift;
34 2         3 my $infostruc_array=shift;
35 2         4 my $url_array_sorted=shift;
36 2         4 my $file_array_sorted=shift;
37              
38 2         3 my @url_array;
39             my @file_array;
40              
41 2 50       13 die "need an array ref not " . ref $url_array_sorted
42             unless ( ref $url_array_sorted ) =~ m/ARRAY/;
43 2 50       9 die "need an array ref not " . ref $file_array_sorted
44             unless ( ref $url_array_sorted ) =~ m/ARRAY/;
45              
46 2 50       101 open(INFOSTRUCS, $filename) or die "couldn't open config file $filename";
47              
48 2 50       7 print STDERR "Reading cofig file $filename\n"
49             if $verbose & 64;
50              
51 2         48 while (defined(my $conf_line=)) {
52 5 50       17 next if $conf_line =~ m/^\s*(?:\#.*)$/; #comment lines and empty lines
53 5 50       11 print STDERR "conf line $conf_line\n"
54             if $verbose;
55              
56 5 50       15 $conf_line =~ m<^\s*(\S+)\s+(\S+)\s.*\%> and
57             die '% and " are reserved in infostructure config file ' . $filename;
58              
59 5         32 my ($mode, $url, $directory, $junk) =
60             $conf_line =~ m,^\s*(\S+)\s+(\S+) #non optional mode and url
61             (?:\s+(\S+) #directory
62             (?: \s*(.\s+))? #junk
63             )?,x;
64              
65              
66 5 50       14 die "badly formatted line in infostruc conf file\n$conf_line\n"
67             . "too many spaces" if $junk;
68              
69 5         8 my $infostruc;
70              
71             CASE: {
72 5 100       5 $mode =~ m/^(?:www|directory)$/ and do {
  5         21  
73 4         14 $infostruc =
74             {
75             mode => $mode,
76             url_base => $url,
77             file_base => $directory
78             };
79 4         15 $infostruc_hash->{$url} = $infostruc;
80 4         8 last CASE;
81             };
82 1 50       5 $mode eq "advanced" and do {
83 1 50       4 defined $infostruc_hash->{$url} or do {
84 0         0 print STDERR <
85             Found advanced infostructure $url in
86             $filename without definition.
87              
88             You have to define the \$::infostrucs{} definition in your
89             .link-control.pl file for every advanced infostructure. See the LinkController
90             reference manual for details.
91             EOF
92 0         0 die "\$::infostrucs{$url} not defined";
93             };
94              
95 1         3 $infostruc =$infostruc_hash->{$url};
96              
97             #this is good since it minimises duplication of data
98             #so we won't offer a warning normally
99 1 50       5 defined $infostruc->{url_base} or do {
100 1 50       5 warn "url_base defined for $url infostruc; copying hash key"
101             if $verbose;
102 1         12 $infostruc->{url_base}=$url;
103             };
104             #this is not documented yet ... but I think it's
105             #sensible..... it may go away though
106 1 50       4 defined $infostruc->{mode} or do {
107 1 50       4 warn "mode not defined for $url infostruc"
108             unless $no_warn;
109 1         4 $infostruc->{mode}="www";
110 1 50       10 $infostruc->{mode}="directory"
111             if defined $infostruc->{file_base};
112             };
113              
114 1 50 33     28 $infostruc->{mode} eq "directory"
115             and not defined $infostruc->{file_base}
116             and die "file_base not defined for $url infostructure";
117 1         2 last CASE;
118             };
119 0         0 die "unknown mode $mode" ;
120             }
121              
122 5 50       16 $infostruc->{url_base} eq $url
123             or die "base url inconsistency $url / "
124             . $infostruc->{url_base} . "\n";
125              
126              
127 5 50       14 print STDERR "got data for infostructure at $url\n"
128             if $verbose & 64;
129              
130             #fixme; maybe we should use BTREEs rather than doing binary
131             #searches on arrays. This is likely to be a performance critical
132             #piece of code?
133              
134 5         10 push @$infostruc_array, $url;
135 5         12 push @url_array, [ $url, $infostruc ];
136 5 100       43 push @file_array, [ $infostruc->{file_base}, $infostruc ]
137             if defined $infostruc->{file_base};
138             }
139              
140 2         10 @$url_array_sorted=sort {$b->[0] cmp $a->[0]} @url_array;
  4         11  
141 2         9 @$file_array_sorted=sort {$b->[0] cmp $a->[0]} @file_array;
  3         11  
142              
143             }
144              
145              
146             =head2 url_to_file
147              
148             C takes a url as an argument. It then does a binary
149             search on a reverse sorted array of url bases. It uses infostructure
150             definitions to convert the URL into a filename.
151              
152             This function should untaint it's result because it should use the
153             infostructure definitions to ensure that we only do things to files we
154             are supposed to do things to.
155              
156             =cut
157              
158             # Given any url, get us something we can edit in order to change the
159             # resource referenced by that url. Or not, if we can't. In the case
160             # that we can't, return undef.
161              
162              
163             # N.B. This will accept any filename which is within the infostructure
164             # whatsoever.. it is possible that that includes more than you wish to
165             # let people edit.
166              
167              
168             sub _find_best_match ($$) {
169 4     4   7 my $array=shift;
170 4         5 my $key=shift;
171              
172 4 50 33     32 cluck "usage _find_best_match(, )"
173             unless ref ($array) =~ m/ARRAY/ and defined $key;
174              
175             # we want the longest match.. so we look through the list.. if we
176             # are put at something equal to us then we are fine. If it's not
177             # equal to us, then the value we point at will be greater than our
178             # url. Because we use reverse sorting then greater means shorter,
179             # so it will be okay to.
180              
181 4         23 my $old_position;
182             my $read=sub {
183 13     13   135 my ( $handle, $val, $position ) =@_;
184 13 50       46 (ref $handle) =~ m/ARRAY/ or die "need an array reference not $handle";
185             # ($position) =~ m/^[0-9]+$/i or die "need whole number, not $position";
186 13 100       27 $position=$old_position+1 unless defined $position;
187 13         15 $old_position=$position;
188 13 100       31 return undef if $position > $#$handle;
189 12         18 my $aval=$handle->[$position];
190 12 50       23 print STDERR "comparing ($aval->[0] cmp $val) at position $position of "
191             . $#$handle . "\n"
192             if $verbose & 128;
193 12         41 return ($aval->[0] cmp $val) , $position;
194 4         27 };
195              
196              
197 4         19 my $pos = binary_search ( 0, $#$array, $key, $read, $array, 1 );
198              
199             # we could be pointing off the end..
200 4 100       35 return undef unless defined $pos;
201 3 50       9 return undef if $pos > $#$array;
202 3         4 return @{$array->[$pos]};
  3         27  
203             }
204              
205             sub _clean_filepath ($) {
206 2     2   4 my $path=shift;
207             # Now we clean up the filename. For This we assume unix semantics.
208             # These have been around for long enough that any sensible operating
209             # system could have simply copied them.
210              
211 2         7 $path =~ s,/./,,g;
212              
213             #now chop away down references..
214              
215             # substitute a downchange (dirname/) followed by an upchange ( /../ )
216             # for nothing.
217 2         24 1 while $path =~ s,([^.]|(.[^.])|(..?))/+..($|/),,g ;
218              
219             # clean up multiple slashes
220              
221 2         4 $path =~ s,//,/,g;
222              
223             # delete leading slash
224              
225 2         14 $path =~ s,^/,,g;
226              
227 2 50       20 if ($path =~ m,(^|/)..($|/),) {
228 0         0 croak "upreferences (/../) make ". $path." an unsafe path";
229             }
230              
231             #what are the properties of the filename we can return..
232             #any string which doesn't contain /.. (and refuse /.
233              
234             #now we untaint and do a check..
235              
236 2         12 my ($ret)= $path =~ m,( (?: # directory name; xxx/ or filename; xxx
237             (?: # some filename ....
238             (?:[^./][^/]+) #a filename with no dot
239             |(?:.[^./][^/]+) #a filename starting with .
240             |(?:..[^./][^/]+)#a filename starting with .. why bother?
241             )
242             (?:/|$) # seperator to next directory name or end of filename
243             ) +
244             ),x;
245 2         6 return $ret; #can be undef
246             }
247              
248              
249             sub url_to_file ($) {
250 2     2 1 25 my $url=shift;
251 2 50       6 defined $url or
252             croak "usage url_to_file(); missing url argument";
253 2 50       21 $url =~ m/[a-z][a-z0-9-]+:/i or
254             croak "usage url_to_file(); not url_to_file($url)";
255 2 50       8 @_ and croak "usage url_to_file(); extra argument";
256              
257              
258 2         6 my ($base, $infostruc) = _find_best_match(\@::infostruc_urls_sorted, $url);
259              
260 2 100 66     50 unless (defined $base and ( $url =~ m/^$base/ )) {
261             #taint??
262 1         4 $url=~s/[^A-Za-z\:\&\+\/\.]/\_/g;
263 1 50       7 carp "url_to_file; can't convert '$url' not in an infostructure"
264             unless $no_warn;
265 1         5 return undef;
266             }
267              
268 1 50       4 print STDERR "trying to map $url to editable object\n"
269             if $verbose & 64;
270              
271 1         3 my $file_base=$infostruc->{file_base};
272              
273             #FIXME: we should search all possible infostructures for more general
274             #candidates. We are supposed to handle all file access safely, so
275             #cases where there isn't supposed to be a file associated with a
276             #page and we find a file name which is a script in a dynamic web
277             #page should be okay
278              
279             #then again, maybe the user should choose so this is better.
280              
281 1 50       3 defined $file_base or do {
282 0         0 warn "url $url is in an infostructure without a filebase";
283 0         0 return undef;
284             };
285              
286             #make the url relative to the base.
287              
288 1         13 $url =~ s/^$base//;
289              
290 1         3 my $relative=_clean_filepath($url);
291              
292 1 50       4 return undef unless defined $relative;
293             # print STDERR "base $file_base relative $relative\n";
294              
295 1         3 $file_base =~ s,/$,,;
296             #FIXME: filebase can contain a / so this can end up with //. do we care?
297 1         6 return $file_base . '/' . $relative; #filebase should be an internal variable
298             }
299              
300             =head2 file_to_url
301              
302             C takes a correctly prepared filename as an argument.
303             It then does a binary search on a reverse sorted array of filebases bases.
304             It then uses infostructure definitions to convert the filename to a URL.
305              
306             This function should untaint it's result because it should use the
307             infostructure definitions to ensure that we only do things to files we
308             are supposed to do things to.
309              
310             =cut
311              
312             # Given any url, get us something we can edit in order to change the
313             # resource referenced by that url. Or not, if we can't. In the case
314             # that we can't, return undef.
315              
316              
317             # N.B. This will accept any filename which is within the infostructure
318             # whatsoever.. it is possible that that includes more than you wish to
319             # let people edit.
320              
321             sub file_to_url ($) {
322 2     2 1 17 my $file=shift;
323 2 50       6 defined $file or
324             croak "usage file_to_url(); missing file argument";
325 2 50       5 @_ and croak "usage file_to_url(); extra argument";
326              
327 2         6 my ($base, $infostruc) = _find_best_match(\@::infostruc_files_sorted, $file);
328              
329 2 100 66     35 unless (defined $base and $file =~ m/^$base/) {
330             #taint??
331 1         4 $file=~s/[^A-Za-z\:\&\+\/\.]/\_/g;
332 1 50       4 carp "file_to_url; can't convert '$file' not in an infostructure"
333             unless $no_warn;
334 1         3 return undef;
335             }
336              
337 1 50       5 (my $url_base=$infostruc->{url_base} )
338             or die "badly defined infostruc for file $base";
339              
340              
341 1 50       4 print STDERR "trying to map $file to URL\n"
342             if $verbose & 64;
343              
344             #make the url relative to the base.
345              
346 1         20 $file =~ s/^$base//;
347              
348 1         2 my $relative=_clean_filepath($file);
349              
350 1 50       4 return undef unless defined $relative;
351              
352 1 50       4 $relative =~ m,^/, and die "\$relative should be a relative url not $relative";
353 1         3 $url_base =~ s,/$,,;
354 1         6 return $url_base . '/' . $relative; #filebase should be an internal variable
355             }
356              
357              
358              
359             1;