| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Perlanet::Trait::Scrubber; |
|
2
|
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
2266
|
use strict; |
|
|
6
|
|
|
|
|
9
|
|
|
|
6
|
|
|
|
|
156
|
|
|
4
|
6
|
|
|
6
|
|
23
|
use warnings; |
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
116
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
44
|
use 5.6.0; |
|
|
6
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.58'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
6
|
|
|
6
|
|
20
|
use Moose::Role; |
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
29
|
|
|
10
|
6
|
|
|
6
|
|
18715
|
use namespace::autoclean; |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
34
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
6
|
|
|
6
|
|
2947
|
use HTML::Scrubber; |
|
|
6
|
|
|
|
|
10183
|
|
|
|
6
|
|
|
|
|
1429
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Perlanet::Trait::Scrubber - clean posts with HTML::Scrubber before aggregating |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Before adding a post to the aggregated feed, it will first be cleaned with |
|
21
|
|
|
|
|
|
|
L<HTML::Scrubber>. |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 scrubber |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
An instance of L<HTML::Scrubber> used to remove unwanted content from |
|
28
|
|
|
|
|
|
|
the feed entries. For default settings see source of Perlanet.pm. |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=cut |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has 'scrubber' => ( |
|
33
|
|
|
|
|
|
|
is => 'rw', |
|
34
|
|
|
|
|
|
|
lazy_build => 1 |
|
35
|
|
|
|
|
|
|
); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _build_scrubber { |
|
38
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
39
|
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
my %scrub_rules = ( |
|
41
|
|
|
|
|
|
|
img => { |
|
42
|
|
|
|
|
|
|
src => qr{^https?://}, # only URL with http:// |
|
43
|
|
|
|
|
|
|
alt => 1, # alt attributes allowed |
|
44
|
|
|
|
|
|
|
align => 1, # allow align on images |
|
45
|
|
|
|
|
|
|
style => 1, |
|
46
|
|
|
|
|
|
|
width => 1, |
|
47
|
|
|
|
|
|
|
height => 1, |
|
48
|
|
|
|
|
|
|
'*' => 0, # deny all others |
|
49
|
|
|
|
|
|
|
}, |
|
50
|
|
|
|
|
|
|
style => 0, |
|
51
|
|
|
|
|
|
|
script => 0, |
|
52
|
|
|
|
|
|
|
span => { |
|
53
|
|
|
|
|
|
|
id => 0, # blogger(?) includes spans with id attribute |
|
54
|
|
|
|
|
|
|
}, |
|
55
|
|
|
|
|
|
|
a => { |
|
56
|
|
|
|
|
|
|
href => 1, |
|
57
|
|
|
|
|
|
|
'*' => 0, |
|
58
|
|
|
|
|
|
|
}, |
|
59
|
|
|
|
|
|
|
); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# Definitions for HTML::Scrub |
|
62
|
0
|
|
|
|
|
|
my %scrub_def = ( |
|
63
|
|
|
|
|
|
|
'*' => 1, |
|
64
|
|
|
|
|
|
|
'href' => qr{^(?!(?:java)?script)}i, |
|
65
|
|
|
|
|
|
|
'src' => qr{^(?!(?:java)?script)}i, |
|
66
|
|
|
|
|
|
|
'cite' => '(?i-xsm:^(?!(?:java)?script))', |
|
67
|
|
|
|
|
|
|
'language' => 0, |
|
68
|
|
|
|
|
|
|
'name' => 1, |
|
69
|
|
|
|
|
|
|
'value' => 1, |
|
70
|
|
|
|
|
|
|
'onblur' => 0, |
|
71
|
|
|
|
|
|
|
'onchange' => 0, |
|
72
|
|
|
|
|
|
|
'onclick' => 0, |
|
73
|
|
|
|
|
|
|
'ondblclick' => 0, |
|
74
|
|
|
|
|
|
|
'onerror' => 0, |
|
75
|
|
|
|
|
|
|
'onfocus' => 0, |
|
76
|
|
|
|
|
|
|
'onkeydown' => 0, |
|
77
|
|
|
|
|
|
|
'onkeypress' => 0, |
|
78
|
|
|
|
|
|
|
'onkeyup' => 0, |
|
79
|
|
|
|
|
|
|
'onload' => 0, |
|
80
|
|
|
|
|
|
|
'onmousedown' => 0, |
|
81
|
|
|
|
|
|
|
'onmousemove' => 0, |
|
82
|
|
|
|
|
|
|
'onmouseout' => 0, |
|
83
|
|
|
|
|
|
|
'onmouseover' => 0, |
|
84
|
|
|
|
|
|
|
'onmouseup' => 0, |
|
85
|
|
|
|
|
|
|
'onreset' => 0, |
|
86
|
|
|
|
|
|
|
'onselect' => 0, |
|
87
|
|
|
|
|
|
|
'onsubmit' => 0, |
|
88
|
|
|
|
|
|
|
'onunload' => 0, |
|
89
|
|
|
|
|
|
|
'src' => 1, |
|
90
|
|
|
|
|
|
|
'type' => 1, |
|
91
|
|
|
|
|
|
|
'style' => 1, |
|
92
|
|
|
|
|
|
|
'class' => 0, |
|
93
|
|
|
|
|
|
|
'id' => 0, |
|
94
|
|
|
|
|
|
|
'frameborder' => 0, |
|
95
|
|
|
|
|
|
|
'border' => 0, |
|
96
|
|
|
|
|
|
|
); |
|
97
|
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
my $scrub = HTML::Scrubber->new; |
|
99
|
0
|
|
|
|
|
|
$scrub->rules(%scrub_rules); |
|
100
|
0
|
|
|
|
|
|
$scrub->default(1, \%scrub_def); |
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
return $scrub; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
around 'clean_html' => sub { |
|
106
|
|
|
|
|
|
|
my $orig = shift; |
|
107
|
|
|
|
|
|
|
my ($self, $html) = @_; |
|
108
|
|
|
|
|
|
|
$html = $self->$orig($html); |
|
109
|
|
|
|
|
|
|
my $scrubbed = $self->scrubber->scrub($html); |
|
110
|
|
|
|
|
|
|
return $scrubbed; |
|
111
|
|
|
|
|
|
|
}; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 AUTHOR |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Dave Cross, <dave@mag-sol.com> |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Copyright (c) 2010 by Magnum Solutions Ltd. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
122
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
|
123
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
1; |