| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RSSycklr; |
|
2
|
2
|
|
|
2
|
|
56357
|
use Mouse; |
|
|
2
|
|
|
|
|
103933
|
|
|
|
2
|
|
|
|
|
13
|
|
|
3
|
2
|
|
|
2
|
|
712
|
no warnings "uninitialized"; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
92
|
|
|
4
|
2
|
|
|
2
|
|
10
|
use Carp ( qw/ carp confess croak / ); |
|
|
2
|
|
|
|
|
8
|
|
|
|
2
|
|
|
|
|
181
|
|
|
5
|
2
|
|
|
2
|
|
2187
|
use YAML (); |
|
|
2
|
|
|
|
|
26829
|
|
|
|
2
|
|
|
|
|
52
|
|
|
6
|
2
|
|
|
2
|
|
2640
|
use XML::Feed (); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use HTML::Truncate (); |
|
8
|
|
|
|
|
|
|
use HTML::TokeParser::Simple (); |
|
9
|
|
|
|
|
|
|
use XML::LibXML (); |
|
10
|
|
|
|
|
|
|
use DateTime (); |
|
11
|
|
|
|
|
|
|
use Scalar::Util "blessed"; |
|
12
|
|
|
|
|
|
|
use URI (); |
|
13
|
|
|
|
|
|
|
use File::ShareDir (); |
|
14
|
|
|
|
|
|
|
use Hash::Merge::Simple "merge"; |
|
15
|
|
|
|
|
|
|
use File::Spec; |
|
16
|
|
|
|
|
|
|
use Encode; |
|
17
|
|
|
|
|
|
|
use Capture::Tiny "capture"; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = "0.15"; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has "keep_tags" => |
|
22
|
|
|
|
|
|
|
is => "rw", |
|
23
|
|
|
|
|
|
|
isa => "HashRef", |
|
24
|
|
|
|
|
|
|
default => sub { |
|
25
|
|
|
|
|
|
|
return { map {; $_ => 1 } qw( del ins i u b em |
|
26
|
|
|
|
|
|
|
strong abbr br img dfn |
|
27
|
|
|
|
|
|
|
acronym q sub sup cite |
|
28
|
|
|
|
|
|
|
code kbd samp strong var |
|
29
|
|
|
|
|
|
|
strike s tt a ) |
|
30
|
|
|
|
|
|
|
}; |
|
31
|
|
|
|
|
|
|
}, |
|
32
|
|
|
|
|
|
|
; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
has "debug" => |
|
35
|
|
|
|
|
|
|
is => "rw", |
|
36
|
|
|
|
|
|
|
isa => "Bool", |
|
37
|
|
|
|
|
|
|
default => sub { 0 }, |
|
38
|
|
|
|
|
|
|
; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
has "tt2" => |
|
41
|
|
|
|
|
|
|
is => "ro", |
|
42
|
|
|
|
|
|
|
lazy => 1, # not always used |
|
43
|
|
|
|
|
|
|
isa => "Template", |
|
44
|
|
|
|
|
|
|
default => sub { |
|
45
|
|
|
|
|
|
|
require Template; |
|
46
|
|
|
|
|
|
|
Template->new({ |
|
47
|
|
|
|
|
|
|
ENCODING => 'UTF-8', |
|
48
|
|
|
|
|
|
|
DEFAULT_ENCODING => 'UTF-8', |
|
49
|
|
|
|
|
|
|
}); |
|
50
|
|
|
|
|
|
|
}, |
|
51
|
|
|
|
|
|
|
handles => [qw( process )], |
|
52
|
|
|
|
|
|
|
; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# No type so it can take any Template takes |
|
55
|
|
|
|
|
|
|
has "template" => |
|
56
|
|
|
|
|
|
|
is => "rw", |
|
57
|
|
|
|
|
|
|
lazy => 1, # not always used |
|
58
|
|
|
|
|
|
|
default => sub { \<<"TT_TEMPLATE"; |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
[%-FOR feed IN rssycklr.feeds() %] |
|
61
|
|
|
|
|
|
|
[%-NEXT UNLESS feed.count %] |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
<[% feed_title_tag || "h4" %]> |
|
64
|
|
|
|
|
|
|
[%-FILTER html; feed.title_override || feed.title; END %] |
|
65
|
|
|
|
|
|
|
[% feed_title_tag || "h4" %]> |
|
66
|
|
|
|
|
|
|
[%~IF feed.entries.0.lede %] |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
[%-FOR entry IN feed.entries %] |
|
69
|
|
|
|
|
|
|
[%-entry.title | html %] |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
[% entry.lede %] |
|
72
|
|
|
|
|
|
|
[% modified = entry.modified ? entry.modified : entry.feed.modified %] |
|
73
|
|
|
|
|
|
|
[% modified.ymd(".") %] [% modified.hour_12 %]:[% modified.min | format('%02d') %][% modified.am_or_pm %] |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
[%~END %] |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
[%~ELSE %] |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
[%-FOR entry IN feed.entries %] |
|
81
|
|
|
|
|
|
|
[%-entry.title | html %] |
|
82
|
|
|
|
|
|
|
[%~END %] |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
[%~END %] |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
[%~END %] |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
TT_TEMPLATE |
|
89
|
|
|
|
|
|
|
}, |
|
90
|
|
|
|
|
|
|
; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
has "xml_parser" => |
|
93
|
|
|
|
|
|
|
is => "rw", |
|
94
|
|
|
|
|
|
|
isa => "XML::LibXML", |
|
95
|
|
|
|
|
|
|
default => sub { |
|
96
|
|
|
|
|
|
|
my $libxml = XML::LibXML->new(); |
|
97
|
|
|
|
|
|
|
$libxml->keep_blanks(1); |
|
98
|
|
|
|
|
|
|
$libxml->line_numbers(1); |
|
99
|
|
|
|
|
|
|
$libxml->complete_attributes(1); |
|
100
|
|
|
|
|
|
|
$libxml->clean_namespaces(1); |
|
101
|
|
|
|
|
|
|
$libxml->no_network(1); |
|
102
|
|
|
|
|
|
|
$libxml->recover_silently(1); |
|
103
|
|
|
|
|
|
|
return $libxml; |
|
104
|
|
|
|
|
|
|
}, |
|
105
|
|
|
|
|
|
|
handles => [qw( parse_html_string )], |
|
106
|
|
|
|
|
|
|
; |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
has "dtd" => |
|
109
|
|
|
|
|
|
|
is => "rw", |
|
110
|
|
|
|
|
|
|
isa => "XML::LibXML::Dtd", |
|
111
|
|
|
|
|
|
|
; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
has "truncater" => |
|
114
|
|
|
|
|
|
|
is => "rw", |
|
115
|
|
|
|
|
|
|
isa => "Object", # "HTML::Truncate", |
|
116
|
|
|
|
|
|
|
default => sub { |
|
117
|
|
|
|
|
|
|
HTML::Truncate->new(repair => 1, |
|
118
|
|
|
|
|
|
|
on_space => 1, |
|
119
|
|
|
|
|
|
|
chars => 170); |
|
120
|
|
|
|
|
|
|
}, |
|
121
|
|
|
|
|
|
|
handles => [ qw( truncate ) ], |
|
122
|
|
|
|
|
|
|
; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
has "feeds" => |
|
125
|
|
|
|
|
|
|
is => "ro", |
|
126
|
|
|
|
|
|
|
auto_deref => 1, |
|
127
|
|
|
|
|
|
|
isa => "ArrayRef", |
|
128
|
|
|
|
|
|
|
default => sub { [] }, |
|
129
|
|
|
|
|
|
|
; |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
before "feeds" => sub { |
|
132
|
|
|
|
|
|
|
my $self = shift; |
|
133
|
|
|
|
|
|
|
while ( my $feed = $self->next() ) |
|
134
|
|
|
|
|
|
|
{ |
|
135
|
|
|
|
|
|
|
push @{$self->{feeds}}, $feed; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
}; |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub BUILD { |
|
140
|
|
|
|
|
|
|
my ( $self, $args ) = @_; |
|
141
|
|
|
|
|
|
|
$self->config(delete $args->{config}) if $args->{config}; |
|
142
|
|
|
|
|
|
|
$self->load_config(delete $args->{load_config}) if $args->{load_config}; |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub config { |
|
146
|
|
|
|
|
|
|
my $self = shift; |
|
147
|
|
|
|
|
|
|
$self->{_config} ||= $self->_default_config(); |
|
148
|
|
|
|
|
|
|
my $hash = shift || return $self->{_config}; |
|
149
|
|
|
|
|
|
|
$self->{_config} = merge $self->{_config}, $hash; |
|
150
|
|
|
|
|
|
|
return $self->{_config}; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub load_config { |
|
154
|
|
|
|
|
|
|
my $self = shift; |
|
155
|
|
|
|
|
|
|
my $src = shift || return; |
|
156
|
|
|
|
|
|
|
my $info = ref($src) ? |
|
157
|
|
|
|
|
|
|
$src : $src !~ /\n/ ? |
|
158
|
|
|
|
|
|
|
YAML::LoadFile($src) : YAML::Load($src); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
my $feeds = delete $info->{feeds} || []; |
|
161
|
|
|
|
|
|
|
$self->config($info); |
|
162
|
|
|
|
|
|
|
$self->add_feeds($feeds); |
|
163
|
|
|
|
|
|
|
return $self; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub add_feeds { |
|
167
|
|
|
|
|
|
|
my $self = shift; |
|
168
|
|
|
|
|
|
|
my $feeds = shift; |
|
169
|
|
|
|
|
|
|
my $old = scalar @{$self->config->{feeds} || []}; |
|
170
|
|
|
|
|
|
|
my $new = scalar @{$feeds}; |
|
171
|
|
|
|
|
|
|
for my $info ( @{$feeds} ) |
|
172
|
|
|
|
|
|
|
{ |
|
173
|
|
|
|
|
|
|
confess "URI is missing from feed data for feed: ", YAML::Dump($info) |
|
174
|
|
|
|
|
|
|
unless $info->{uri}; |
|
175
|
|
|
|
|
|
|
push @{$self->config->{feeds}}, $info; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
return ( $old + $new ) == @{$self->config->{feeds}}; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub as_string { |
|
181
|
|
|
|
|
|
|
my $self = shift; |
|
182
|
|
|
|
|
|
|
my $out = ""; |
|
183
|
|
|
|
|
|
|
$self->process($self->template, { rssycklr => $self }, \$out) |
|
184
|
|
|
|
|
|
|
or confess $self->tt2->error(); |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
if ( defined wantarray ) |
|
187
|
|
|
|
|
|
|
{ |
|
188
|
|
|
|
|
|
|
return $out; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
else |
|
191
|
|
|
|
|
|
|
{ |
|
192
|
|
|
|
|
|
|
print $out; |
|
193
|
|
|
|
|
|
|
return 1; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub next { |
|
198
|
|
|
|
|
|
|
my $self = shift; |
|
199
|
|
|
|
|
|
|
if ( $self->_maxed_out ) |
|
200
|
|
|
|
|
|
|
{ |
|
201
|
|
|
|
|
|
|
$self->config->{feeds} = []; |
|
202
|
|
|
|
|
|
|
return; |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
my $info = shift @{ $self->config->{feeds} } || return; |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
my $uri = blessed($info->{uri}) eq "URI" ? |
|
207
|
|
|
|
|
|
|
$info->{uri} : URI->new($info->{uri}); |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
my $xml_feed; |
|
210
|
|
|
|
|
|
|
my $ok = eval { |
|
211
|
|
|
|
|
|
|
local $SIG{ALRM} = sub { die "Feed request timeout: $uri\n" }; |
|
212
|
|
|
|
|
|
|
alarm( $info->{timeout} || $self->config->{timeout} || 10 ); |
|
213
|
|
|
|
|
|
|
$xml_feed = XML::Feed->parse($uri) |
|
214
|
|
|
|
|
|
|
or croak("Could not parse $uri, ", XML::Feed->errstr); |
|
215
|
|
|
|
|
|
|
alarm(0); |
|
216
|
|
|
|
|
|
|
1; |
|
217
|
|
|
|
|
|
|
}; |
|
218
|
|
|
|
|
|
|
alarm(0); # Racing parsing fatals can happen in the XML::Feed space(?). |
|
219
|
|
|
|
|
|
|
unless ( $ok == 1 ) |
|
220
|
|
|
|
|
|
|
{ |
|
221
|
|
|
|
|
|
|
carp $@ || ( "Unknown error parsing " . $info->{uri} ) |
|
222
|
|
|
|
|
|
|
if $self->debug; |
|
223
|
|
|
|
|
|
|
return $self->next; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
my $hours_back = DateTime |
|
227
|
|
|
|
|
|
|
->now( time_zone => 'floating' ) |
|
228
|
|
|
|
|
|
|
->subtract( hours => $info->{hours_back} || $self->config->{hours_back} || 170 ); |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
if ( $xml_feed->modified ) |
|
231
|
|
|
|
|
|
|
{ |
|
232
|
|
|
|
|
|
|
return $self->next unless 1 == DateTime->compare( $xml_feed->modified, $hours_back ); |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my $max_display = $info->{max_display} || $self->config->{max_display} || 10; |
|
236
|
|
|
|
|
|
|
my $excerpt_length = $info->{excerpt_length} || $self->config->{excerpt_length}; |
|
237
|
|
|
|
|
|
|
my $title_only = exists($info->{title_only}) ? |
|
238
|
|
|
|
|
|
|
$info->{title_only} # might be undef on purpose to override self->config setting |
|
239
|
|
|
|
|
|
|
: |
|
240
|
|
|
|
|
|
|
$self->config->{title_only}; |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
my @entry; |
|
243
|
|
|
|
|
|
|
ENTRY: |
|
244
|
|
|
|
|
|
|
for my $entry ( $xml_feed->entries ) |
|
245
|
|
|
|
|
|
|
{ |
|
246
|
|
|
|
|
|
|
next ENTRY unless $entry->issued; |
|
247
|
|
|
|
|
|
|
next ENTRY unless 1 == DateTime->compare( $entry->issued, $hours_back ); |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
my %entry; |
|
250
|
|
|
|
|
|
|
unless ( $title_only ) |
|
251
|
|
|
|
|
|
|
{ |
|
252
|
|
|
|
|
|
|
next ENTRY if $entry->content->body !~ /\S/; |
|
253
|
|
|
|
|
|
|
my $xhtml = $self->html_to_dom( $entry->content->body ) |
|
254
|
|
|
|
|
|
|
or die "Couldn't parse ", $entry->content->body; |
|
255
|
|
|
|
|
|
|
$self->_strip_attributes($xhtml); |
|
256
|
|
|
|
|
|
|
$self->_strip_tags($xhtml); |
|
257
|
|
|
|
|
|
|
$self->_handle_images($xhtml, $entry); |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my ( $body ) = $xhtml->findnodes("body"); |
|
260
|
|
|
|
|
|
|
unless ( $xhtml->findnodes("head") ) |
|
261
|
|
|
|
|
|
|
{ |
|
262
|
|
|
|
|
|
|
my $head = $xhtml->createElement("head"); |
|
263
|
|
|
|
|
|
|
my $title = $xhtml->createElement("title"); |
|
264
|
|
|
|
|
|
|
my $text = $xhtml->createTextNode(__PACKAGE__ . "/" . $VERSION); |
|
265
|
|
|
|
|
|
|
$title->appendChild($text); |
|
266
|
|
|
|
|
|
|
$head->appendChild($title); |
|
267
|
|
|
|
|
|
|
$xhtml->insertBefore($head,$body); |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# Cache it. |
|
271
|
|
|
|
|
|
|
unless ( $self->dtd ) |
|
272
|
|
|
|
|
|
|
{ |
|
273
|
|
|
|
|
|
|
$self->config->{dtd} ||= "xhtml1-transitional.dtd"; |
|
274
|
|
|
|
|
|
|
my $dtd_file = File::ShareDir::dist_file(__PACKAGE__, |
|
275
|
|
|
|
|
|
|
$self->config->{dtd}); |
|
276
|
|
|
|
|
|
|
$/ = undef; |
|
277
|
|
|
|
|
|
|
open my $fh, "<", $dtd_file or croak "Couldn't open '$dtd_file' for reading: $!"; |
|
278
|
|
|
|
|
|
|
$self->{ $self->config->{dtd} } = <$fh>; |
|
279
|
|
|
|
|
|
|
close $fh or carp "Trouble closing '$dtd_file': $!"; |
|
280
|
|
|
|
|
|
|
$self->dtd( XML::LibXML::Dtd->parse_string($self->{ $self->config->{dtd} }) ); |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
unless ( eval { $xhtml->validate($self->dtd); 1; } ) |
|
284
|
|
|
|
|
|
|
{ |
|
285
|
|
|
|
|
|
|
carp $@ || "Uknown error", |
|
286
|
|
|
|
|
|
|
" - parsing content of '", $entry->title, |
|
287
|
|
|
|
|
|
|
"' from ", $xml_feed->link; |
|
288
|
|
|
|
|
|
|
next ENTRY; |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
my $content = ""; |
|
292
|
|
|
|
|
|
|
$content .= $_->serialize(1) for $body->childNodes(); |
|
293
|
|
|
|
|
|
|
my $more = join("", |
|
294
|
|
|
|
|
|
|
decode_utf8($self->config->{ellipsis}), |
|
295
|
|
|
|
|
|
|
'
296
|
|
|
|
|
|
|
$entry->link, |
|
297
|
|
|
|
|
|
|
'">', |
|
298
|
|
|
|
|
|
|
decode_utf8($self->config->{read_more}), |
|
299
|
|
|
|
|
|
|
'' |
|
300
|
|
|
|
|
|
|
); |
|
301
|
|
|
|
|
|
|
my $output = $self->truncate( $content, |
|
302
|
|
|
|
|
|
|
$excerpt_length, |
|
303
|
|
|
|
|
|
|
$more ); |
|
304
|
|
|
|
|
|
|
$output =~ s/\s\s+/ /g; |
|
305
|
|
|
|
|
|
|
$entry{lede} = $output; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
$entry{xml_feed_entry} = $entry; |
|
308
|
|
|
|
|
|
|
$entry{feed} = $xml_feed; |
|
309
|
|
|
|
|
|
|
push @entry, \%entry; |
|
310
|
|
|
|
|
|
|
last ENTRY if @entry >= $max_display; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
return $self->next unless @entry; |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my $feed = RSSycklr::Feed->new( %{$info}, |
|
316
|
|
|
|
|
|
|
ellipsis => $self->config->{ellipsis}, # not sure, weak ref to parent instead? |
|
317
|
|
|
|
|
|
|
xml_feed => $xml_feed, ); |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
$feed->{entries} = [ map { $_->{feed} = $feed; RSSycklr::Feed::Entry->new($_) } @entry ]; |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
$self->{_feeds_returned}++; |
|
322
|
|
|
|
|
|
|
return $feed; |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub html_to_dom { |
|
326
|
|
|
|
|
|
|
my $self = shift; |
|
327
|
|
|
|
|
|
|
my $html = shift || return; |
|
328
|
|
|
|
|
|
|
my $renew = ""; |
|
329
|
|
|
|
|
|
|
my $p = HTML::TokeParser::Simple->new(\$html); |
|
330
|
|
|
|
|
|
|
no warnings "uninitialized"; |
|
331
|
|
|
|
|
|
|
while ( my $token = $p->get_token ) |
|
332
|
|
|
|
|
|
|
{ |
|
333
|
|
|
|
|
|
|
if ( $token->is_text |
|
334
|
|
|
|
|
|
|
or not $HTML::Tagset::isKnown{ $token->get_tag } ) |
|
335
|
|
|
|
|
|
|
{ |
|
336
|
|
|
|
|
|
|
my $txt = HTML::Entities::decode_entities($token->as_is); |
|
337
|
|
|
|
|
|
|
$txt =~ s/[^[:print:]]+/ /g; # kill unprintables for a space. |
|
338
|
|
|
|
|
|
|
$renew .= $txt; |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
elsif ( $token->get_tag =~ /\Abr\b/i ) |
|
341
|
|
|
|
|
|
|
{ |
|
342
|
|
|
|
|
|
|
$renew .= "\n"; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
elsif ( $HTML::Tagset::canTighten{ $token->get_tag } ) |
|
345
|
|
|
|
|
|
|
{ |
|
346
|
|
|
|
|
|
|
# Replace block-like tags with \n if we have content |
|
347
|
|
|
|
|
|
|
# already and not more than twice consecutively. |
|
348
|
|
|
|
|
|
|
$renew .= $token->as_is; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
else |
|
351
|
|
|
|
|
|
|
{ |
|
352
|
|
|
|
|
|
|
$renew .= $token->as_is; |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# XML::LibXML is noisy even with recover_silently, so- |
|
357
|
|
|
|
|
|
|
my $dom; |
|
358
|
|
|
|
|
|
|
my ( $out, $err ) = capture { |
|
359
|
|
|
|
|
|
|
$dom = $self->parse_html_string(<<"HTML"); |
|
360
|
|
|
|
|
|
|
Untitled$renew |
|
361
|
|
|
|
|
|
|
HTML |
|
362
|
|
|
|
|
|
|
}; |
|
363
|
|
|
|
|
|
|
$dom; |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _maxed_out { |
|
367
|
|
|
|
|
|
|
my $self = shift; |
|
368
|
|
|
|
|
|
|
if ( $self->config->{max_feeds} |
|
369
|
|
|
|
|
|
|
and |
|
370
|
|
|
|
|
|
|
$self->config->{max_feeds} <= $self->{_feeds_returned} ) |
|
371
|
|
|
|
|
|
|
{ |
|
372
|
|
|
|
|
|
|
return 1; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
return; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub _strip_attributes { |
|
378
|
|
|
|
|
|
|
my ( $self, $root ) = @_; |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
for my $node ( $root->findnodes("//*") ) |
|
381
|
|
|
|
|
|
|
{ |
|
382
|
|
|
|
|
|
|
for my $attr ( $node->attributes ) |
|
383
|
|
|
|
|
|
|
{ |
|
384
|
|
|
|
|
|
|
next if $node->nodeName eq 'a' and $attr->name eq 'href'; |
|
385
|
|
|
|
|
|
|
next if $node->nodeName eq 'img' and $attr->name eq 'src'; |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
next if $attr->name eq 'title' |
|
388
|
|
|
|
|
|
|
and $node->nodeName =~ /\A(?:acronym|abbr|dfn|a)\z/; |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
$node->removeAttribute($attr->name); |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
} |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub _handle_images { |
|
396
|
|
|
|
|
|
|
my ( $self, $root, $entry ) = @_; |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
for my $node ( $root->findnodes("//img") ) |
|
399
|
|
|
|
|
|
|
{ |
|
400
|
|
|
|
|
|
|
if ( $node->getAttribute("src") !~ m,\Ahttp://, ) |
|
401
|
|
|
|
|
|
|
{ |
|
402
|
|
|
|
|
|
|
$node->parentNode->removeChild($node); |
|
403
|
|
|
|
|
|
|
return; |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
# Don't put a link on images that already have one. |
|
406
|
|
|
|
|
|
|
next if $node->parentNode->tagName eq "a"; |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
my $link = $node->getOwner->createElement("a"); |
|
409
|
|
|
|
|
|
|
$link->setAttribute("href", $entry->link); |
|
410
|
|
|
|
|
|
|
$link->setAttribute("title", $entry->title); |
|
411
|
|
|
|
|
|
|
$node->setAttribute("alt", $entry->title); |
|
412
|
|
|
|
|
|
|
$link->appendChild( $node->cloneNode ); |
|
413
|
|
|
|
|
|
|
$node->parentNode->replaceChild( $link, $node ); |
|
414
|
|
|
|
|
|
|
return 1; # Just do one for now. |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub _strip_tags { |
|
419
|
|
|
|
|
|
|
my ( $self, $root ) = @_; |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
my $doc = $root->getOwnerDocument; |
|
422
|
|
|
|
|
|
|
my $keep = $self->keep_tags; |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# Special case, we must have this and don't want it mucking the interface. |
|
425
|
|
|
|
|
|
|
$keep->{body} = 1; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
my @nodes = $root->findnodes("//*"); |
|
428
|
|
|
|
|
|
|
for my $node ( @nodes ) |
|
429
|
|
|
|
|
|
|
{ |
|
430
|
|
|
|
|
|
|
next unless $node; |
|
431
|
|
|
|
|
|
|
next if $keep->{$node->nodeName}; |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
my $frag = $doc->createDocumentFragment(); |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
for my $n ( $node->childNodes ) |
|
436
|
|
|
|
|
|
|
{ |
|
437
|
|
|
|
|
|
|
$frag->appendChild($n); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
$node->replaceNode($frag); |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
return 1 unless $keep->{br}; |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
my @outer = $root->findnodes("body/*"); |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
FORWARD: |
|
447
|
|
|
|
|
|
|
for my $br ( @outer ) { |
|
448
|
|
|
|
|
|
|
last FORWARD unless $br and $br->tagName eq "br"; |
|
449
|
|
|
|
|
|
|
$br->parentNode->removeChild($br); |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
BACKWARD: |
|
453
|
|
|
|
|
|
|
for my $br ( reverse @outer ) { |
|
454
|
|
|
|
|
|
|
last BACKWARD unless $br and $br->tagName eq "br"; |
|
455
|
|
|
|
|
|
|
$br->parentNode->removeChild($br); |
|
456
|
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
return 1; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub _default_config { |
|
461
|
|
|
|
|
|
|
return { |
|
462
|
|
|
|
|
|
|
excerpt_length => 150, |
|
463
|
|
|
|
|
|
|
ellipsis => "\x{2026}", # chr(8230), |
|
464
|
|
|
|
|
|
|
read_more => "[more]", |
|
465
|
|
|
|
|
|
|
title_only => undef, |
|
466
|
|
|
|
|
|
|
hours_back => 72, |
|
467
|
|
|
|
|
|
|
max_feeds => 10, |
|
468
|
|
|
|
|
|
|
# max_entries => 10, |
|
469
|
|
|
|
|
|
|
max_display => 3, |
|
470
|
|
|
|
|
|
|
timeout => 30, |
|
471
|
|
|
|
|
|
|
css_class => "rssycklr", |
|
472
|
|
|
|
|
|
|
# title_length => undef, |
|
473
|
|
|
|
|
|
|
# excerpt_style => dl|p|br|ul |
|
474
|
|
|
|
|
|
|
# title_style => ul|p|br # not implemented, ul/li happens now |
|
475
|
|
|
|
|
|
|
# max_images => 1 # this is hardcoded for now |
|
476
|
|
|
|
|
|
|
feed_title_tag => "h4", |
|
477
|
|
|
|
|
|
|
dtd => "xhtml1-transitional.dtd", |
|
478
|
|
|
|
|
|
|
}; |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
package RSSycklr::Feed; |
|
484
|
|
|
|
|
|
|
use Mouse; |
|
485
|
|
|
|
|
|
|
use HTML::Entities qw( decode_entities ); |
|
486
|
|
|
|
|
|
|
use Encode qw( decode_utf8 ); |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
has "xml_feed" => |
|
489
|
|
|
|
|
|
|
is => "ro", |
|
490
|
|
|
|
|
|
|
required => 1, |
|
491
|
|
|
|
|
|
|
isa => "Object", |
|
492
|
|
|
|
|
|
|
handles => [qw( tagline link copyright modified |
|
493
|
|
|
|
|
|
|
author generator language )], |
|
494
|
|
|
|
|
|
|
; |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
has "entries" => |
|
497
|
|
|
|
|
|
|
is => "ro", |
|
498
|
|
|
|
|
|
|
lazy => 1, |
|
499
|
|
|
|
|
|
|
default => sub { [] }, |
|
500
|
|
|
|
|
|
|
required => 1, |
|
501
|
|
|
|
|
|
|
auto_deref => 1, |
|
502
|
|
|
|
|
|
|
isa => "ArrayRef", |
|
503
|
|
|
|
|
|
|
; |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
has "title_override" => |
|
506
|
|
|
|
|
|
|
is => "ro", |
|
507
|
|
|
|
|
|
|
isa => "Str", |
|
508
|
|
|
|
|
|
|
default => sub { "" }, |
|
509
|
|
|
|
|
|
|
; |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub count { |
|
512
|
|
|
|
|
|
|
scalar @{+shift->entries}; |
|
513
|
|
|
|
|
|
|
} |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub title { |
|
516
|
|
|
|
|
|
|
my $self = shift; |
|
517
|
|
|
|
|
|
|
return $self->{_title} if $self->{_title}; |
|
518
|
|
|
|
|
|
|
# Try to guarantee it doesn't return entities. |
|
519
|
|
|
|
|
|
|
$self->{_title} = decode_entities(decode_entities($self->xml_feed->title)); |
|
520
|
|
|
|
|
|
|
$self->{_title} = decode_utf8( $self->{_title} ); |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
package RSSycklr::Feed::Entry; |
|
526
|
|
|
|
|
|
|
use Mouse; |
|
527
|
|
|
|
|
|
|
use DateTime; |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
has "xml_feed_entry" => ( is => "ro", |
|
530
|
|
|
|
|
|
|
required => 1, |
|
531
|
|
|
|
|
|
|
isa => "Object", # ::Atom/RSS |
|
532
|
|
|
|
|
|
|
handles => [qw( title link content category id author issued modified )], |
|
533
|
|
|
|
|
|
|
); |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
has "lede" => ( is => "ro", |
|
536
|
|
|
|
|
|
|
isa => "Str", |
|
537
|
|
|
|
|
|
|
default => sub { "" }, |
|
538
|
|
|
|
|
|
|
); |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
has "feed" => ( is => "ro", |
|
541
|
|
|
|
|
|
|
weak_ref => 1, |
|
542
|
|
|
|
|
|
|
isa => "RSSycklr::Feed", |
|
543
|
|
|
|
|
|
|
); |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
1; |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
__END__ |