| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::SiteTear::PageFilter; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
39
|
|
|
4
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
32
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use File::Basename; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
81
|
|
|
6
|
1
|
|
|
1
|
|
1019
|
use Encode; |
|
|
1
|
|
|
|
|
14154
|
|
|
|
1
|
|
|
|
|
102
|
|
|
7
|
1
|
|
|
1
|
|
801
|
use Encode::Guess; |
|
|
1
|
|
|
|
|
5278
|
|
|
|
1
|
|
|
|
|
572
|
|
|
8
|
1
|
|
|
1
|
|
81
|
use URI; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
25
|
|
|
9
|
|
|
|
|
|
|
#use Data::dumper; |
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
1033
|
use HTML::Parser 3.40; |
|
|
1
|
|
|
|
|
12469
|
|
|
|
1
|
|
|
|
|
59
|
|
|
12
|
1
|
|
|
1
|
|
1258
|
use HTML::HeadParser; |
|
|
1
|
|
|
|
|
1276
|
|
|
|
1
|
|
|
|
|
31
|
|
|
13
|
1
|
|
|
1
|
|
8
|
use base qw(HTML::Parser Class::Accessor); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
153
|
|
|
14
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors(qw(has_remote_base |
|
15
|
|
|
|
|
|
|
page)); |
|
16
|
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
1067
|
use HTML::Copy; |
|
|
1
|
|
|
|
|
6100
|
|
|
|
1
|
|
|
|
|
11
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '1.43'; |
|
20
|
|
|
|
|
|
|
our @htmlSuffix = qw(.html .htm .xhtml); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 NAME |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
HTML::SiteTear::PageFilter - change link pathes in HTML files. |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYMPOSIS |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use HTML::SiteTear::PageFilter; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# $page must be an instance of L. |
|
31
|
|
|
|
|
|
|
$filter = HTML::SiteTear::PageFilter->new($page); |
|
32
|
|
|
|
|
|
|
$fileter->parse_file(); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This module is to change link pathes in HTML files. It's a sub class of L. Internal use only. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 METHODS |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 new |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$filter = HTML::SiteTear::PageFilter->new($page); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Make an instance of this moduel. $parent must be an instance of HTML::SiteTear::Root or HTML::SiteTear::Page. This method is called from $parent. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub new { |
|
49
|
0
|
|
|
0
|
1
|
|
my ($class, $page) = @_; |
|
50
|
0
|
|
|
|
|
|
my $parent = $class->SUPER::new(); |
|
51
|
0
|
|
|
|
|
|
my $self = bless $parent, $class; |
|
52
|
0
|
|
|
|
|
|
$self->page($page); |
|
53
|
0
|
|
|
|
|
|
$self->{'allow_abs_link'} = $page->source_root->allow_abs_link; |
|
54
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 0; |
|
55
|
0
|
|
|
|
|
|
$self->has_remote_base(0); |
|
56
|
0
|
|
|
|
|
|
return $self; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 parse_file |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
$filter->parse_file; |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Parse the HTML file given by $page and change link pathes. The output data are retuned thru the method "write_data". |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
sub parse_file { |
|
68
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
69
|
0
|
|
|
|
|
|
my $p = HTML::Copy->new($self->page->source_path); |
|
70
|
0
|
|
|
|
|
|
$self->page->set_binmode($p->io_layer); |
|
71
|
0
|
|
|
|
|
|
$self->SUPER::parse($p->source_html); |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 SEE ALOSO |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
L, L, L, L |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 AUTHOR |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Tetsuro KURITA |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
##== private methods |
|
85
|
|
|
|
|
|
|
sub output { |
|
86
|
0
|
|
|
0
|
0
|
|
my ($self, $data) = @_; |
|
87
|
0
|
|
|
|
|
|
$self->page->write_data($data); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##== overriding methods of HTML::Parser |
|
91
|
|
|
|
|
|
|
|
|
92
|
0
|
|
|
0
|
1
|
|
sub declaration { $_[0]->output("") } |
|
93
|
0
|
|
|
0
|
1
|
|
sub process { $_[0]->output($_[2]) } |
|
94
|
0
|
|
|
0
|
1
|
|
sub end { $_[0]->output($_[2]) } |
|
95
|
0
|
|
|
0
|
1
|
|
sub text { $_[0]->output($_[1]) } |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub comment { |
|
98
|
0
|
|
|
0
|
1
|
|
my ($self, $comment) = @_; |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
0
|
|
|
|
|
if ($self->{'allow_abs_link'}) { |
|
101
|
0
|
0
|
|
|
|
|
if ($comment =~ /^\s*begin abs_link/) { |
|
|
|
0
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 1; |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
} elsif($comment =~ /^\s*end abs_link/) { |
|
105
|
0
|
|
|
|
|
|
$self->{'use_abs_link'} = 0; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
$self->output(""); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub start { |
|
113
|
0
|
|
|
0
|
1
|
|
my ($self, $tag, $attr_dict, $attr_names, $tag_text) = @_; |
|
114
|
0
|
|
|
|
|
|
my $page = $self->page; |
|
115
|
0
|
0
|
|
|
|
|
my $empty_tag_end = ($tag =~ /\/>$/) ? ' />' : '>'; |
|
116
|
|
|
|
|
|
|
|
|
117
|
0
|
0
|
|
|
|
|
if ($self->has_remote_base) { |
|
118
|
0
|
|
|
|
|
|
return $self->output($tag_text); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my $process_link = sub { |
|
122
|
0
|
|
|
0
|
|
|
my ($target_attr, $folder_name, $kind) = @_; |
|
123
|
0
|
0
|
|
|
|
|
if (my $link = $attr_dict->{$target_attr}) { |
|
124
|
0
|
0
|
|
|
|
|
if ($self->{'use_abs_link'}) { |
|
125
|
0
|
|
|
|
|
|
$attr_dict->{$target_attr} = $page->build_abs_url($link); |
|
126
|
|
|
|
|
|
|
} else { |
|
127
|
0
|
0
|
|
|
|
|
unless ($kind) {$kind = $folder_name}; |
|
|
0
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
$attr_dict->{$target_attr} |
|
129
|
|
|
|
|
|
|
= $page->change_path($link, $folder_name, $kind); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
0
|
|
|
|
|
|
return HTML::Copy->build_attributes($attr_dict, $attr_names); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
0
|
|
|
|
|
|
return (); |
|
134
|
0
|
|
|
|
|
|
}; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#treat image files |
|
137
|
0
|
0
|
|
|
|
|
if ($tag eq 'base') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
my $uri = URI->new($attr_dict->{'href'}); |
|
139
|
0
|
0
|
0
|
|
|
|
if (!($uri->scheme) or ($uri->scheme eq 'file')) { |
|
140
|
0
|
|
|
|
|
|
$page->base_uri($uri->abs($page->base_uri)); |
|
141
|
0
|
|
|
|
|
|
$tag_text = ''; |
|
142
|
|
|
|
|
|
|
} else { |
|
143
|
0
|
|
|
|
|
|
$self->has_remote_base(1); |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
} elsif ($tag eq 'img') { |
|
147
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->resource_folder_name)) { |
|
148
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
} elsif ($tag eq 'body') { #background images |
|
152
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('background', $page->resource_folder_name)) { |
|
153
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
#linked stylesheet |
|
157
|
|
|
|
|
|
|
elsif ($tag eq 'link') { |
|
158
|
0
|
|
|
|
|
|
my $folder_name = $page->resource_folder_name; |
|
159
|
0
|
|
|
|
|
|
my $kind = $folder_name; |
|
160
|
0
|
|
|
|
|
|
my $relation; |
|
161
|
0
|
0
|
|
|
|
|
if (defined( $relation = ($attr_dict ->{'rel'}) )){ |
|
162
|
0
|
|
|
|
|
|
$relation = lc $relation; |
|
163
|
0
|
0
|
|
|
|
|
if ($relation eq 'stylesheet') { |
|
164
|
0
|
|
|
|
|
|
$kind = 'css'; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('href', $folder_name, $kind)) { |
|
169
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
#frame |
|
173
|
|
|
|
|
|
|
elsif ($tag eq 'frame') { |
|
174
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->page_folder_name, 'page')) { |
|
175
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs".$empty_tag_end; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
#javascript |
|
179
|
|
|
|
|
|
|
elsif ($tag eq 'script') { |
|
180
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('src', $page->resource_folder_name)) { |
|
181
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
#link |
|
185
|
|
|
|
|
|
|
elsif ($tag eq 'a') { |
|
186
|
0
|
0
|
|
|
|
|
if ( exists($attr_dict->{'href'}) ) { |
|
187
|
0
|
|
|
|
|
|
my $href = $attr_dict->{'href'}; |
|
188
|
0
|
|
|
|
|
|
my $kind = 'page'; |
|
189
|
0
|
|
|
|
|
|
my $folder_name = $page->page_folder_name; |
|
190
|
0
|
0
|
|
|
|
|
if ($href !~/(.+)#(.*)/) { |
|
191
|
0
|
|
|
|
|
|
my @matchedSuffix = grep {$href =~ /\Q$_\E$/} @htmlSuffix; |
|
|
0
|
|
|
|
|
|
|
|
192
|
0
|
0
|
|
|
|
|
unless (@matchedSuffix) { |
|
193
|
0
|
|
|
|
|
|
$folder_name = $page->resource_folder_name; |
|
194
|
0
|
|
|
|
|
|
$kind = $folder_name; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
0
|
0
|
|
|
|
|
if (my $tag_attrs = &$process_link('href', $folder_name, $kind)) { |
|
198
|
0
|
|
|
|
|
|
$tag_text = "<$tag $tag_attrs>"; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
$self->output($tag_text); |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
1; |