| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::MobileConverter; |
|
2
|
1
|
|
|
1
|
|
31314
|
use strict; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
40
|
|
|
3
|
1
|
|
|
1
|
|
24871
|
use HTML::Parser; |
|
|
1
|
|
|
|
|
6772
|
|
|
|
1
|
|
|
|
|
37
|
|
|
4
|
1
|
|
|
1
|
|
23362
|
use URI; |
|
|
1
|
|
|
|
|
9870
|
|
|
|
1
|
|
|
|
|
1737
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.011'; |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
sub new { |
|
9
|
1
|
|
|
1
|
1
|
19
|
my $class = shift; |
|
10
|
1
|
|
|
|
|
5
|
my $self = {@_}; |
|
11
|
1
|
|
|
|
|
3
|
bless $self, $class; |
|
12
|
1
|
|
|
|
|
7
|
$self->init; |
|
13
|
1
|
|
|
|
|
3
|
return $self; |
|
14
|
|
|
|
|
|
|
} |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub init { |
|
17
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
|
18
|
1
|
|
50
|
|
|
12
|
$self->{maxlength} ||= 50000; |
|
19
|
1
|
|
50
|
|
|
7
|
$self->{maxpctagcount} ||= 10; |
|
20
|
1
|
|
50
|
|
|
7
|
$self->{maxpctagrate} ||= 0.1; |
|
21
|
1
|
50
|
|
|
|
4
|
$self->{baseuri} or warn 'baseuri is not specified.'; |
|
22
|
|
|
|
|
|
|
$self->{hrefhandler} ||= sub { |
|
23
|
0
|
|
|
0
|
|
0
|
my $href = shift; |
|
24
|
0
|
|
|
|
|
0
|
return URI->new_abs($href, $self->{baseuri}); |
|
25
|
1
|
|
50
|
|
|
4
|
}; |
|
26
|
1
|
|
50
|
|
|
43
|
$self->{mobiletag} ||= { |
|
27
|
|
|
|
|
|
|
a => 'name|accesskey|href|cti|ijam|utn|subject|body|telbook|kana|email|ista|ilet|iswf|irst', |
|
28
|
|
|
|
|
|
|
base => 'href', |
|
29
|
|
|
|
|
|
|
blink => '', |
|
30
|
|
|
|
|
|
|
blockquote => '', |
|
31
|
|
|
|
|
|
|
body => 'bgcolor|text|link|alink|vlink', |
|
32
|
|
|
|
|
|
|
br => 'clear', |
|
33
|
|
|
|
|
|
|
center => '', |
|
34
|
|
|
|
|
|
|
dd => '', |
|
35
|
|
|
|
|
|
|
dir => 'type', |
|
36
|
|
|
|
|
|
|
div => 'align', |
|
37
|
|
|
|
|
|
|
dl => '', |
|
38
|
|
|
|
|
|
|
dt => '', |
|
39
|
|
|
|
|
|
|
font => 'color|size', |
|
40
|
|
|
|
|
|
|
form => 'action|method|utn', |
|
41
|
|
|
|
|
|
|
head => '', |
|
42
|
|
|
|
|
|
|
h1 => 'align', |
|
43
|
|
|
|
|
|
|
h2 => 'align', |
|
44
|
|
|
|
|
|
|
h3 => 'align', |
|
45
|
|
|
|
|
|
|
h4 => 'align', |
|
46
|
|
|
|
|
|
|
h5 => 'align', |
|
47
|
|
|
|
|
|
|
h6 => 'align', |
|
48
|
|
|
|
|
|
|
hr => 'align|size|width|noshade|color', |
|
49
|
|
|
|
|
|
|
html => '', |
|
50
|
|
|
|
|
|
|
img => 'src|align|width|height|hspace|vspace|alt', |
|
51
|
|
|
|
|
|
|
input => 'type|name|size|maxlength|accesskey|value|istyle|checked', |
|
52
|
|
|
|
|
|
|
li => 'type|value', |
|
53
|
|
|
|
|
|
|
marquee => 'behavior|direction|loop|height|width|scrollmount|scrolldelay|bgcolor', |
|
54
|
|
|
|
|
|
|
menu => 'type', |
|
55
|
|
|
|
|
|
|
meta => 'http\-equiv|content', |
|
56
|
|
|
|
|
|
|
object => 'declare|id|data|type|width|height', |
|
57
|
|
|
|
|
|
|
ol => 'type|start', |
|
58
|
|
|
|
|
|
|
option => 'selected|value', |
|
59
|
|
|
|
|
|
|
p => 'align', |
|
60
|
|
|
|
|
|
|
param => 'name|value|valuetype', |
|
61
|
|
|
|
|
|
|
plaintext => '', |
|
62
|
|
|
|
|
|
|
pre => '', |
|
63
|
|
|
|
|
|
|
select => 'name|size|multiple', |
|
64
|
|
|
|
|
|
|
textarea => 'name|accesskey|rows|cols|istyle', |
|
65
|
|
|
|
|
|
|
title => '', |
|
66
|
|
|
|
|
|
|
ul => 'type', |
|
67
|
|
|
|
|
|
|
}; |
|
68
|
1
|
|
|
|
|
3
|
$self->{codetag} = 'script|style'; |
|
69
|
1
|
|
|
|
|
3
|
$self->{ignoretag} = 'form|input|select|option|textarea'; |
|
70
|
1
|
|
|
|
|
2
|
$self->{html} = ''; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _initparser { |
|
74
|
5
|
|
|
5
|
|
1067
|
my $self = shift; |
|
75
|
5
|
|
|
|
|
14
|
$self->{html2} = ''; |
|
76
|
5
|
|
|
|
|
12
|
$self->{mhtml} = ''; |
|
77
|
5
|
|
|
|
|
11
|
$self->{tagcount} = 0; |
|
78
|
5
|
|
|
|
|
9
|
$self->{mtagcount} = 0; |
|
79
|
5
|
|
|
|
|
13
|
$self->{ismobilecontent} = ''; |
|
80
|
5
|
|
|
|
|
76
|
$self->{iscode} = 0; |
|
81
|
5
|
|
|
|
|
155
|
$self->{parser} = HTML::Parser->new( |
|
82
|
|
|
|
|
|
|
api_version => 3, |
|
83
|
|
|
|
|
|
|
handlers => { |
|
84
|
|
|
|
|
|
|
start => [$self->starthandler, 'text, tagname, attr'], |
|
85
|
|
|
|
|
|
|
end => [$self->endhandler, 'text, tagname'], |
|
86
|
|
|
|
|
|
|
text => [$self->texthandler, 'text'], |
|
87
|
|
|
|
|
|
|
default => [$self->defaulthandler, 'event, text'], |
|
88
|
|
|
|
|
|
|
}, |
|
89
|
|
|
|
|
|
|
); |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub starthandler { |
|
93
|
6
|
|
|
6
|
0
|
113
|
my $self = shift; |
|
94
|
|
|
|
|
|
|
return sub { |
|
95
|
9
|
|
|
9
|
|
18
|
my ($text, $tag, $attr) = @_; |
|
96
|
9
|
|
|
|
|
14
|
$self->{tagcount}++; |
|
97
|
9
|
100
|
|
|
|
405
|
if ($tag =~ /^($self->{codetag})$/i) { |
|
98
|
1
|
|
|
|
|
3
|
$self->{iscode}++; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
9
|
100
|
|
|
|
37
|
if (defined $self->{mobiletag}->{$tag}) { |
|
101
|
7
|
|
|
|
|
11
|
$self->{mtagcount}++; |
|
102
|
7
|
50
|
|
|
|
94
|
if ($tag =~ /^($self->{ignoretag})$/i) { |
|
103
|
0
|
|
|
|
|
0
|
return; |
|
104
|
|
|
|
|
|
|
} else { |
|
105
|
7
|
|
|
|
|
24
|
$self->{mhtml} .= $self->_makestartm($tag,$attr); |
|
106
|
7
|
|
|
|
|
22
|
$self->{html2} .= $self->_makestart2($tag,$attr); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
6
|
|
|
|
|
53
|
}; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub _makestartm { |
|
113
|
8
|
|
|
8
|
|
673
|
my $self = shift; |
|
114
|
8
|
50
|
|
|
|
90
|
my $tag = shift or return; |
|
115
|
8
|
50
|
|
|
|
22
|
my $attr = shift or return; |
|
116
|
8
|
100
|
|
|
|
35
|
if ($tag eq 'img') { |
|
117
|
1
|
|
|
|
|
4
|
my ($w,$h) = @$attr{'width', 'height'}; |
|
118
|
1
|
0
|
33
|
|
|
10
|
unless ($w && $h && $w < 100 && $h < 100) { |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
119
|
1
|
|
|
|
|
2
|
my $text = 'img:'; |
|
120
|
1
|
|
50
|
|
|
5
|
$text .= $attr->{alt} || $attr->{title} || ''; |
|
121
|
1
|
|
|
|
|
5
|
return $text; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
7
|
|
|
|
|
15
|
my $attrpat = $self->{mobiletag}->{$tag}; |
|
125
|
7
|
|
|
|
|
13
|
my $text = qq|<$tag|; |
|
126
|
7
|
|
|
|
|
186
|
for my $key (keys %$attr) { |
|
127
|
4
|
100
|
|
|
|
467
|
if ($key =~ /^(href|action)$/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
128
|
3
|
|
|
|
|
18
|
$text .= qq| $key="| . |
|
129
|
|
|
|
|
|
|
$self->{hrefhandler}($attr->{$key}) . '"'; |
|
130
|
|
|
|
|
|
|
} elsif ($key eq 'src') { |
|
131
|
0
|
|
|
|
|
0
|
$text .= qq| $key="|. URI->new_abs($attr->{$key}, $self->{baseuri}) . '"'; |
|
132
|
|
|
|
|
|
|
} elsif ($key =~ /^($attrpat)$/i) { |
|
133
|
0
|
|
|
|
|
0
|
$text .= qq| $key="$attr->{$key}"|; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
7
|
|
|
|
|
762
|
$text .= '>'; |
|
137
|
7
|
|
|
|
|
16
|
return $text; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub _makestart2 { |
|
141
|
8
|
|
|
8
|
|
384
|
my $self = shift; |
|
142
|
8
|
50
|
|
|
|
38
|
my $tag = shift or return; |
|
143
|
8
|
50
|
|
|
|
20
|
my $attr = shift or return; |
|
144
|
8
|
|
|
|
|
14
|
my $text = qq|<$tag|; |
|
145
|
8
|
|
|
|
|
21
|
for my $key (keys %$attr) { |
|
146
|
7
|
100
|
|
|
|
25521
|
if ($key =~ /^(href|action)$/) { |
|
|
|
100
|
|
|
|
|
|
|
147
|
3
|
|
|
|
|
477
|
$text .= qq| $key="| . |
|
148
|
|
|
|
|
|
|
$self->{hrefhandler}($attr->{$key}) . '"'; |
|
149
|
|
|
|
|
|
|
} elsif ($key eq 'src') { |
|
150
|
1
|
|
|
|
|
51
|
$text .= qq| $key="|. URI->new_abs($attr->{$key}, $self->{baseuri}) . '"'; |
|
151
|
|
|
|
|
|
|
} else { |
|
152
|
3
|
|
|
|
|
18
|
$text .= qq| $key="$attr->{$key}"|; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
} |
|
155
|
8
|
|
|
|
|
507
|
$text .= '>'; |
|
156
|
8
|
|
|
|
|
59
|
return $text; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub endhandler { |
|
160
|
6
|
|
|
6
|
0
|
11
|
my $self = shift; |
|
161
|
|
|
|
|
|
|
return sub { |
|
162
|
9
|
|
|
9
|
|
14
|
my ($text, $tag) = @_; |
|
163
|
9
|
100
|
|
|
|
94
|
if ($tag =~ /^($self->{codetag})$/i) { |
|
164
|
1
|
|
|
|
|
2
|
$self->{iscode}--; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
9
|
100
|
|
|
|
36
|
if (defined $self->{mobiletag}->{$tag}) { |
|
167
|
7
|
50
|
|
|
|
108
|
if ($tag =~ /^($self->{ignoretag})$/i) { |
|
168
|
0
|
|
|
|
|
0
|
$text = ''; |
|
169
|
|
|
|
|
|
|
} else { |
|
170
|
7
|
|
|
|
|
12
|
$self->{mhtml} .= $text; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
7
|
|
|
|
|
32
|
$self->{html2} .= $text; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
6
|
|
|
|
|
105
|
}; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub texthandler { |
|
178
|
6
|
|
|
6
|
0
|
10
|
my $self = shift; |
|
179
|
|
|
|
|
|
|
return sub { |
|
180
|
11
|
|
|
11
|
|
20
|
my ($text) = @_; |
|
181
|
11
|
100
|
|
|
|
29
|
if (!$self->{iscode}) { |
|
182
|
10
|
|
|
|
|
16
|
$self->{mhtml} .= $text; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
11
|
|
|
|
|
54
|
$self->{html2} .= $text; |
|
185
|
6
|
|
|
|
|
40
|
}; |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub defaulthandler { |
|
189
|
5
|
|
|
5
|
0
|
11
|
my $self = shift; |
|
190
|
5
|
|
|
3
|
|
58
|
return sub {}; |
|
|
3
|
|
|
|
|
115
|
|
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub convert { |
|
194
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
195
|
2
|
50
|
|
|
|
11
|
$self->{html} = shift or return; |
|
196
|
2
|
|
|
|
|
6
|
$self->_initparser; |
|
197
|
2
|
|
|
|
|
151
|
$self->{parser}->parse($self->{html}); |
|
198
|
2
|
100
|
|
|
|
7
|
if ($self->_checkmobile) { |
|
199
|
1
|
|
|
|
|
5
|
return $self->{html2}; |
|
200
|
|
|
|
|
|
|
} else { |
|
201
|
1
|
|
|
|
|
4
|
return $self->{mhtml}; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub _checkmobile { |
|
206
|
3
|
|
|
3
|
|
6
|
my $self = shift; |
|
207
|
3
|
50
|
33
|
|
|
115
|
if ($self->{maxlength} && (length($self->{html}) > $self->{maxlength})) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
0
|
return; # over max size |
|
209
|
|
|
|
|
|
|
} elsif (($self->{tagcount} - $self->{mtagcount}) > $self->{maxpctagcount}) { |
|
210
|
0
|
|
|
|
|
0
|
return; # includes many pc tags |
|
211
|
|
|
|
|
|
|
} elsif (!$self->{tagcount}) { |
|
212
|
|
|
|
|
|
|
# do nothing |
|
213
|
|
|
|
|
|
|
} elsif (($self->{mtagcount} / $self->{tagcount}) < (1 - $self->{maxpctagrate})) { |
|
214
|
2
|
|
|
|
|
8
|
return; # includes many pc tags |
|
215
|
|
|
|
|
|
|
} |
|
216
|
1
|
|
|
|
|
5
|
$self->{ismobilecontent} = 1; |
|
217
|
1
|
|
|
|
|
4
|
return $self->ismobilecontent; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub ismobilecontent { |
|
221
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
|
222
|
3
|
|
|
|
|
14
|
return $self->{ismobilecontent}; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
15
|
|
|
15
|
0
|
538
|
sub param { $_[0]->{$_[1]}; } |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
1; |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
__END__ |