File Coverage

blib/lib/Perlanet.pm
Criterion Covered Total %
statement 91 120 75.8
branch 12 34 35.2
condition 1 18 5.5
subroutine 24 26 92.3
pod 9 9 100.0
total 137 207 66.1


line stmt bran cond sub pod time code
1             package Perlanet;
2              
3 6     6   9212 use strict;
  6         14  
  6         182  
4 6     6   41 use warnings;
  6         14  
  6         162  
5              
6 6     6   438 use Moose;
  6         468625  
  6         51  
7 6     6   44163 use namespace::autoclean;
  6         9630  
  6         71  
8              
9 6     6   574 use Carp;
  6         17  
  6         478  
10 6     6   3655 use DateTime::Duration;
  6         2898421  
  6         230  
11 6     6   58 use DateTime;
  6         15  
  6         120  
12 6     6   2830 use Perlanet::Entry;
  6         56  
  6         301  
13 6     6   2988 use Perlanet::Feed;
  6         29  
  6         245  
14 6     6   56 use Try::Tiny;
  6         14  
  6         392  
15 6     6   41 use URI::Fetch;
  6         13  
  6         122  
16 6     6   72 use XML::Feed;
  6         15  
  6         144  
17              
18 6     6   32 use vars qw{$VERSION};
  6         14  
  6         344  
19              
20             BEGIN {
21 6     6   6628 $VERSION = '1.0.0';
22             }
23              
24             with 'MooseX::Traits';
25              
26             $XML::Atom::ForceUnicode = 1;
27              
28             has 'ua' => (
29             is => 'rw',
30             isa => 'LWP::UserAgent',
31             lazy_build => 1
32             );
33              
34             sub _build_ua {
35 4     4   14 my $self = shift;
36 4         62 my $ua = LWP::UserAgent->new(
37             agent => "Perlanet/$VERSION"
38             );
39 4 50       12994 $ua->show_progress(1) if -t STDOUT;
40 4         34 $ua->env_proxy;
41              
42 4         15375 return $ua;
43             }
44              
45             has 'cutoff' => (
46             isa => 'DateTime',
47             is => 'ro',
48             default => sub {
49             DateTime->now + DateTime::Duration->new(weeks => 1);
50             }
51             );
52              
53             has 'entries' => (
54             isa => 'Int',
55             is => 'rw',
56             default => 10,
57             );
58              
59             has 'entries_per_feed' => (
60             isa => 'Int',
61             is => 'rw',
62             default => 5,
63             );
64              
65             has 'feeds' => (
66             isa => 'ArrayRef',
67             is => 'ro',
68             default => sub { [] }
69             );
70              
71             has 'author' => (
72             isa => 'HashRef',
73             is => 'ro',
74             );
75              
76             has $_ => (
77             isa => 'Str',
78             is => 'ro',
79             ) for qw( self_link title description url agent );
80              
81             =head1 NAME
82              
83             Perlanet - A program for creating programs that aggregate web feeds (both
84             RSS and Atom).
85              
86             =head1 SYNOPSIS
87              
88             my $perlanet = Perlanet->new;
89             $perlanet->run;
90              
91             =head1 DESCRIPTION
92              
93             Perlanet is a program for creating programs that aggregate web feeds (both
94             RSS and Atom). Web pages like this are often called "Planets" after the Python
95             software which originally popularised them. Perlanet is a planet builder
96             written in Perl - hence "Perlanet".
97              
98             You are probably interested in L<Perlanet::Simple> to get started straight
99             out of the box, batteries included style.
100              
101             Perlanet itself is the driving force behind everything, however. Perlanet
102             reads a series of web feeds (filtering only those that are valid), sorts
103             and selects entries from these web feeds, and then creates a new aggregate
104             feed and renders this aggregate feed. Perlanet allows the user to customize
105             all of these steps through subclassing and roles.
106              
107             For most uses, you probably don't want to use the Perlanet module. The
108             L<perlanet> command line program is far more likely to be useful.
109              
110             =head1 CONSTRUCTOR
111              
112             =head2 new
113              
114             my $perlanet = Perlanet->new
115              
116             The constructor method. Can be passed a hashref of initialisers.
117              
118             See L</ATTRIBUTES> below for details of the key/value pairs to pass in.
119              
120             =head1 ATTRIBUTES
121              
122             =over
123              
124             =item ua
125              
126             An instance of L<LWP::UserAgent>. Defaults to a simple agent using C<<
127             $cfg->{agent} >> as the user agent name, or C< Perlanet/$VERSION >.
128              
129             =item cutoff
130              
131             An instance of L<DateTime> which represents the earliest date for
132             which feed posts will be fetched/shown.
133              
134             =item feeds
135              
136             An arrayref of L<Perlanet::Feed> objects representing the feeds to
137             collect data from.
138              
139             =back
140              
141             =head1 METHODS
142              
143             =head2 fetch_page
144              
145             Attempt to fetch a web page and a returns a L<URI::Fetch::Response> object.
146              
147             =cut
148              
149             sub fetch_page {
150 0     0 1 0 my ($self, $url) = @_;
151 0         0 return URI::Fetch->fetch(
152             $url,
153             UserAgent => $self->ua,
154             ForceResponse => 1,
155             );
156             }
157              
158             =head2 fetch_feeds
159              
160             Called internally by L</run> and passed the list of feeds in L</feeds>.
161              
162             Attempt to download all given feeds, as specified in the C<feeds> attribute.
163             Returns a list of L<Perlanet::Feed> objects, with the actual feed data
164             loaded.
165              
166             NB: This method also modifies the contents of L</feeds>.
167              
168             =cut
169              
170             sub fetch_feeds {
171 5     5 1 20 my ($self, $feeds) = @_;
172              
173 5         15 my @valid_feeds;
174 5         22 for my $feed (@$feeds) {
175 6         1763 my $response = $self->fetch_page($feed->url);
176              
177 6 50       123998 if ($response->is_error) {
178 0         0 carp 'Error retrieving ' . $feed->url;
179 0         0 carp $response->http_response->status_line;
180 0         0 next;
181             }
182              
183 6 50       253 unless (length $response->content) {
184 0         0 carp 'No data returned from ' . $feed->url;
185 0         0 next;
186             }
187              
188             try {
189 6     6   305 my $data = $response->content;
190 6         242 my $xml_feed = XML::Feed->parse(\$data);
191              
192 6         830 $feed->_xml_feed($xml_feed);
193 0 0       0 $feed->title($xml_feed->title) unless $feed->title;
194              
195 0         0 push @valid_feeds, $feed;
196             }
197             catch {
198 6     6   235860 carp 'Errors parsing ' . $feed->url;
199 6 50       1041 carp $_ if defined $_;
200 6         131 };
201             }
202              
203 5         7143 return \@valid_feeds;
204             }
205              
206             =head2 select_entries
207              
208             Called internally by L</run> and passed the list of feeds from
209             L</fetch_feeds>.
210              
211             Returns a combined list of L<Perlanet::Entry> objects from all given feeds.
212              
213             =cut
214              
215             sub select_entries {
216 5     5 1 5084 my ($self, $feeds) = @_;
217              
218 5         10 my @feed_entries;
219 5         14 for my $feed (@$feeds) {
220 0         0 my @entries = $feed->_xml_feed->entries;
221              
222 0 0 0     0 if ($self->entries_per_feed and @entries > $self->entries_per_feed) {
223 0         0 $#entries = $self->entries_per_feed - 1;
224             }
225              
226             push @feed_entries,
227             map {
228 0         0 $_->title($feed->title . ': ' . $_->title);
  0         0  
229              
230             # Problem with XML::Feed's conversion of RSS to Atom
231 0 0 0     0 if ($_->issued && ! $_->modified) {
232 0         0 $_->modified($_->issued);
233             }
234              
235             Perlanet::Entry->new(
236 0         0 _entry => $_,
237             feed => $feed
238             );
239             } @entries;
240             }
241              
242 5         17 return \@feed_entries;
243             }
244              
245             =head2 sort_entries
246              
247             Called internally by L</run> and passed the list of entries from
248             L</select_entries>.
249              
250             Sort the given list of entries into created/modified order for aggregation,
251             and filters them if necessary.
252              
253             Takes a list of L<Perlanet::Entry>s, and returns an ordered list.
254              
255             =cut
256              
257             sub sort_entries {
258 3     3 1 3944 my ($self, $entries) = @_;
259 3         42 my $day_zero = DateTime->from_epoch(epoch => 0);
260              
261             my @entries = grep {
262 0   0     0 ($_->issued || $_->modified || $day_zero) < $self->cutoff
263             } sort {
264 3   0     1351 ($b->modified || $b->issued || $day_zero)
  0   0     0  
265             <=>
266             ($a->modified || $a->issued || $day_zero)
267             } @$entries;
268              
269             # Only need so many entries
270 3 50 33     114 if ($self->entries && @entries > $self->entries) {
271 0         0 $#entries = $self->entries - 1;
272             }
273              
274 3         20 return \@entries;
275             }
276              
277             =head2 build_feed
278              
279             Called internally by L</run> and passed the list of entries from
280             L</sort_entries>.
281              
282             Takes a list of L<Perlanet::Entry>s, and returns a L<Perlanet::Feed>
283             that is the actual feed for the planet.
284              
285             =cut
286              
287             sub build_feed {
288 3     3 1 3723 my ($self, $entries) = @_;
289              
290 3         98 my $self_url = $self->self_link;
291              
292 3         38 my $f = Perlanet::Feed->new( modified => DateTime->now );
293 3 50       82 $f->title($self->title) if defined $self->title;
294 3 50       89 $f->url($self->url) if defined $self->url;
295 3 50       94 $f->description($self->description) if defined $self->description;
296 3 50       113 $f->author($self->author->{name}) if defined $self->author->{name};
297 3 50       71 $f->email($self->author->{email}) if defined $self->author->{email};
298 3 50       77 $f->self_link($self->url) if defined $self->url;
299 3 50       73 $f->id($self->url) if defined $self->url;
300              
301 3         14 $f->add_entry($_) for @$entries;
302              
303 3         14 return $f;
304             }
305              
306             =head2 clean_html
307              
308             Clean a HTML string so it is suitable for display.
309              
310             Takes a HTML string and returns a "cleaned" HTML string.
311              
312             =cut
313              
314             sub clean_html {
315 0     0 1 0 my ($self, $entry) = @_;
316 0         0 return $entry;
317             }
318              
319             =head2 clean_entries
320              
321             Clean all entries for the planet.
322              
323             Takes a list of entries, runs them through C<clean> and returns a list of
324             cleaned entries.
325              
326             =cut
327              
328             sub clean_entries {
329 3     3 1 3819 my ($self, $entries) = @_;
330              
331 3         8 my @clean_entries;
332              
333 3         12 foreach (@$entries) {
334 0 0       0 if (my $body = $_->content->body) {
335 0         0 my $cleaned = $self->clean_html($body);
336 0         0 $_->content->body($cleaned);
337             }
338              
339 0 0       0 if (my $summary = $_->summary->body) {
340 0         0 my $cleaned = $self->clean_html($summary);
341 0         0 $_->summary->body($cleaned);
342             }
343              
344 0         0 push @clean_entries, $_;
345             }
346              
347 3         11 return \@clean_entries;
348             }
349              
350             =head2 render
351              
352             Called internally by L</run> and passed the feed from L</build_feed>.
353              
354             This is the hook where you generate some type of page to display the result
355             of aggregating feeds together (ie, inserting the posts into a database,
356             running a HTML templating library, etc)
357              
358             Takes a L<Perlanet::Feed> as input (as generated by L<build_feed>.
359              
360             =cut
361              
362             sub render {
363 1     1 1 37 my ($self, $feed) = @_;
364             }
365              
366             =head2 run
367              
368             The main method which runs the perlanet process.
369              
370             =cut
371              
372             sub run {
373 1     1 1 14 my $self = shift;
374              
375 1         42 my $feeds = $self->fetch_feeds($self->feeds);
376 1         10 my $selected = $self->select_entries($feeds);
377 1         9 my $sorted = $self->sort_entries($selected);
378 1         8 my $cleaned = $self->clean_entries($sorted);
379 1         6 my $feed = $self->build_feed($cleaned);
380              
381 1         9 $self->render($feed);
382             }
383              
384             =head1 TO DO
385              
386             See http://wiki.github.com/davorg/perlanet
387              
388             =head1 SUPPORT
389              
390             There is a mailing list which acts as both a place for developers to talk
391             about maintaining and improving Perlanet and also for users to get support.
392             You can sign up to this list at
393             L<http://lists.mag-sol.com/mailman/listinfo/perlanet>
394              
395             To report bugs in Perlanet, please use the CPAN request tracker. You can
396             either use the web page at
397             L<http://rt.cpan.org/Public/Bug/Report.html?Queue=Perlanet> or send an email
398             to bug-Perlanet@rt.cpan.org.
399              
400             =head1 SEE ALSO
401              
402             =over 4
403              
404             =item *
405              
406             L<perlanet>
407              
408             =item *
409              
410             L<Plagger>
411              
412             =back
413              
414             =head1 AUTHOR
415              
416             Dave Cross, <dave@mag-sol.com>
417              
418             =head1 COPYRIGHT AND LICENSE
419              
420             Copyright (c) 2010 by Magnum Solutions Ltd.
421              
422             This library is free software; you can redistribute it and/or modify
423             it under the same terms as Perl itself, either Perl version 5.10.0 or,
424             at your option, any later version of Perl 5 you may have available.
425              
426             =cut
427              
428             1;