|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package WWW::Link::Repair;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $REVISION=q$Revision: 1.11 $ ; $VERSION = sprintf ( "%d.%02d", $REVISION =~ /(\d+).(\d+)/ );  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Repair.pm - repair links in files.  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Repair::Substitutor;  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Repair;  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $linksubs1 = WWW::Link::Repair::Substitutor::gen_substitutor(  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        "http://bounce.com/" ,  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        "http://bing.bong/",  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        0, 0,  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       );  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $filehand = WWW::Link::Repair::Substitutor::gen_simple_file_handler ($linksubs);  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &$filehand("fix-this-file");  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use CDB_File::BiIndex;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $::index = new CDB_File::BiIndex "page-index-file", "link-index-file";  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     WWW::Link::Repair::infostructure($index, $filehand, "http://bounce.com/");  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module provides functions that allow the repair of files.  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our ($fakeit);  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our ($verbose);  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our ($no_warn);  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $fakeit = 0 unless defined $fakeit;  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $verbose = 0 unless defined $verbose;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $no_warn = 0 unless defined $no_warn;  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
3184
 | 
 use File::Copy;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11127
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
166
 | 
    | 
| 
43
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
17
 | 
 use Carp;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
174
 | 
    | 
| 
44
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
10
 | 
 use strict;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
317
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 directory(file handler, directory ... )  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This function recurses through each given directory argument.  For  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 each file found it calls the file handler function.  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The B should be a function which can be called on a  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 filename and will update that file with the new URL.  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B is the base URL which should be iterated from.  It must  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 exist within the B.  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B controlls whether to attempt to replace all links  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 below that link.  In this case the index is iterated beyond the first  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 link for all links which begin with the first link.  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub directory {  | 
| 
64
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
   my $handler=shift;  | 
| 
65
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   our ($fixed);  | 
| 
66
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   local $fixed;  | 
| 
67
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   $fixed=0;  | 
| 
68
 | 
0
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
   my $sub = sub {-d && return 0; $fixed += &$handler($File::Find::name)};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
69
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   File::Find::find($sub, @_);  | 
| 
70
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   return $fixed;  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 infostructure(index object, file handler, oldurl, as_directory)  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This function will use a previously build index to update all of the  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 files referenced from that index that need updating.  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The B object will be treated as a BiIndex.  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The B should be a function which can be called on a  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 filename and will update that file with the new URL.  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B is the base URL which should be iterated from.  It must  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 exist within the B.  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B controlls whether to attempt to replace all links  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 below that link.  In this case the index is iterated beyond the first  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 link for all links which begin with the first link.  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
9
 | 
 use vars qw($infostrucbase $filebase);  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1796
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _link_fix($$$) {  | 
| 
95
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
9
 | 
   my $url_to_file=shift;  | 
| 
96
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   my $file_handler=shift;  | 
| 
97
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my $editlist=shift;  | 
| 
98
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   my $fixed=0;  | 
| 
99
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
  PAGE: foreach my $member (@$editlist) {  | 
| 
100
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     print STDERR "going to convert $member to file\n"  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if $WWW::Link::Repair::verbose & 32;  | 
| 
102
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     my $file = &$url_to_file($member);  | 
| 
103
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
113
 | 
     defined $file or do {  | 
| 
104
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       print STDERR "No filename for $member.  Skipping\n";  | 
| 
105
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       next PAGE;  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
107
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     print STDERR "file is $file\n" if $WWW::Link::Repair::verbose & 32;  | 
| 
108
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $fixed+=&$file_handler($file);  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
110
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   return $fixed;  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub infostructure ($$$$;$) {  | 
| 
115
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
60192
 | 
   my ($oldurl, $index, $url_to_file, $file_handler, $recursive, $junk)=@_;  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
49
 | 
   defined $file_handler or   | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak "missing argument to infostructure(\$\$\$\$;\$)";  | 
| 
119
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
116
 | 
   $oldurl =~ m/^[a-z][a-z0-9-]*:/ or  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak "first argument to infostructure() must be a url not $oldurl";  | 
| 
121
 | 
3
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
141
 | 
   ref $index and $index->can("lookup_second") or  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak "second argument to infostructure() must be a biindex not $index";  | 
| 
123
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
   (ref $url_to_file) =~ m/CODE/ or  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak "third argument to infostructure() must be a CODE ref not $url_to_file";  | 
| 
125
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
   (ref $file_handler) =~ m/CODE/ or  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak "fourth argument to infostructure() must be a CODE ref not $url_to_file";  | 
| 
127
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
   defined $junk and croak "extra argument to infostructure(\$\$\$\$;\$)";  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
   my $key=$index->second_set_iterate($oldurl);  | 
| 
130
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
143
 | 
   my $fixed=0;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
   if (defined $key) {  | 
| 
133
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if ($recursive) {		#we should substitute all links below this  | 
| 
134
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
136
 | 
       while (defined $key and $key =~ m/^$oldurl/) {  | 
| 
135
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 	my $editlist=$index->lookup_second($key);  | 
| 
136
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
82
 | 
 	$fixed += _link_fix($url_to_file,$file_handler,$editlist);  | 
| 
137
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	$key = $index->second_next();  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {			#just warn if there are any links below this.  | 
| 
140
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       my $next;  | 
| 
141
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
60
 | 
       if ( $key =~ m/^$oldurl/ and not $key eq $oldurl ) {  | 
| 
142
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	warn "There were no files with exactly that link to edit.\n";  | 
| 
143
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	last;  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
145
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	my $editlist=$index->lookup_second($oldurl);  | 
| 
146
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	$fixed += _link_fix($url_to_file,$file_handler,$editlist);  | 
| 
147
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	$key=$index->second_next();  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
149
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
52
 | 
       warn "Ignoring URLs starting with your URL such as $key.\n"  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if defined $key and $key =~ m/^$oldurl/;  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
153
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     warn "There were no files with exactly that link to edit.\n";  | 
| 
154
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     print STDERR "key was beyond all keys in index\n"  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if $WWW::Link::Repair::verbose & 16;  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
3
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
36
 | 
   $fixed or carp "didn't make any substitutions for $oldurl" unless $no_warn;  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #FIXME repair the infostructure index..  | 
| 
160
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   return $fixed;  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  =head2 map_url_to_editable  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Given any url, get us something we can edit in order to change the  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  resource referenced by that url.  Or not, if we can't.  In the case  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  that we can't, return undef.  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  The aim of this function is to return something which is not tainted.  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  N.B.  This will accept any filename which is within the infostructure  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  whatsoever.. it is possible that that includes more than you wish to  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  let people edit.  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  For this function to work the two variables:  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $WWW::Link::Repair::filebase  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $WWW::Link::Repair::infostrucbase  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  must be defined appropriately  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  =cut  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  # sub{}  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  # @conversions = [  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  #   { regexp => 'http:://stuff..../'  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  #     changeurlfunc => sub {  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  #     }  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  # ]  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  sub map_url_to_editable ($) {  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    my $save=$_;  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $_=shift;  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    print STDERR "trying to map $_ to editable object\n"  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      if $WWW::Link::Repair::verbose & 64;  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    unless (m/^$infostrucbase/) {  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      my $print=$_;  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      $_=$save;  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      croak "can't deal with url '$print' not in our infostructure"; #taint??  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    }  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    die 'config variable $WWW::Link::Repair::infostrucbase must be defined'  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      unless defined $infostrucbase;  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    s/^$infostrucbase//;  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # Now we clean up the filename.  For This we assume unix semantics.  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # These have been around for long enough that any sensible operating  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # system could have simply copied them.  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    s,/./,,g;  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #now chop away down references..  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # substitute a downchange (dirname/) followed by an upchange ( /../ )  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # for nothing.  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    1 while s,([^.]|(.[^.])|(..?))/+..($|/),,g ;  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # clean up multiple slashes  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    s,//,/,g;  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    # delete leading slash  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    s,^/,,g;  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    if (m,(^|/)..($|/),) {  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      $_=$save;  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      croak "upreferences (/../) put that outside our infostructure";  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    }  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #what are the properties of the filename we can return..  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #any string which doesn't contain /.. (and refuse /.  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #now we untaint and do a check..  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $_ =~ m,( (?:             # directory name; xxx/ or filename; xxx  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	         (?:                # some filename ....  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	           (?:[^./][^/]+)              #a filename with no dot  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	          |(?:.[^./][^/]+)             #a filename starting with .  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	          |(?:..[^./][^/]+)            #a filename starting with .. why bother?  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	         )  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	         (?:/|$)           # seperator to next directory name or end of filename  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	      ) +  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  	    ),x; #we set $1 to the whole qualified filename.  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    my $fixable = $1;  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    $_=$save;  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    return undef unless defined $fixable;  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    die 'config variable $WWW::Link::Repair::filebase must be defined'  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #      unless defined $filebase;  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    #FIXME: filebase can contain a / so this can end up with //. do we care?  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    return $filebase . '/' . $fixable; #filebase should be an internal variable  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  }  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 check_url_is_full  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The aim of this function is to check whether a given url is full  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub check_url ($) {  | 
| 
268
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
   my $fixable=shift;  | 
| 
269
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  FIXABLE: foreach (@$fixable) {  | 
| 
270
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     m,^[A-Za-z]+://^[A-Za-z]+/, or die "unqualified URL in database $_";  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1; #why are we all afraid of require?  Why do we give in??  |