File Coverage

blib/lib/Perlanet.pm
Criterion Covered Total %
statement 40 128 31.2
branch 0 38 0.0
condition 0 27 0.0
subroutine 14 26 53.8
pod 9 9 100.0
total 63 228 27.6


line stmt bran cond sub pod time code
1             package Perlanet;
2              
3 7     7   10042 use strict;
  7         34  
  7         208  
4 7     7   53 use warnings;
  7         14  
  7         181  
5              
6 7     7   463 use Moose;
  7         379369  
  7         55  
7 7     7   49673 use namespace::autoclean;
  7         6107  
  7         52  
8              
9 7     7   478 use Carp;
  7         14  
  7         416  
10 7     7   3559 use DateTime::Duration;
  7         2955748  
  7         244  
11 7     7   64 use DateTime;
  7         17  
  7         154  
12 7     7   3298 use Perlanet::Entry;
  7         55  
  7         399  
13 7     7   3861 use Perlanet::Feed;
  7         33  
  7         297  
14 7     7   64 use Try::Tiny;
  7         15  
  7         437  
15 7     7   40 use URI::Fetch;
  7         16  
  7         135  
16 7     7   29 use XML::Feed;
  7         15  
  7         169  
17              
18 7     7   38 use vars qw{$VERSION};
  7         16  
  7         354  
19              
20             BEGIN {
21 7     7   8478 $VERSION = '1.1.0';
22             }
23              
24             with 'MooseX::Traits';
25              
26             $XML::Atom::ForceUnicode = 1;
27              
28             has 'ua' => (
29             is => 'rw',
30             isa => 'LWP::UserAgent',
31             lazy_build => 1
32             );
33              
34             sub _build_ua {
35 0     0     my $self = shift;
36 0           my $ua = LWP::UserAgent->new(
37             agent => "Perlanet/$VERSION"
38             );
39 0 0         $ua->show_progress(1) if -t STDOUT;
40 0           $ua->env_proxy;
41              
42 0           return $ua;
43             }
44              
45             has 'cutoff' => (
46             isa => 'DateTime',
47             is => 'ro',
48             default => sub {
49             DateTime->now + DateTime::Duration->new(weeks => 1);
50             }
51             );
52              
53             has 'entries' => (
54             isa => 'Int',
55             is => 'rw',
56             default => 10,
57             );
58              
59             has 'entries_per_feed' => (
60             isa => 'Int',
61             is => 'rw',
62             default => 5,
63             );
64              
65             has 'feeds' => (
66             isa => 'ArrayRef',
67             is => 'ro',
68             default => sub { [] }
69             );
70              
71             has 'author' => (
72             isa => 'HashRef',
73             is => 'ro',
74             );
75              
76             has $_ => (
77             isa => 'Str',
78             is => 'ro',
79             ) for qw( self_link title description url agent );
80              
81             has entry_sort_order => (
82             isa => 'Str',
83             is => 'ro',
84             default => 'modified',
85             );
86              
87             =head1 NAME
88              
89             Perlanet - A program for creating programs that aggregate web feeds (both
90             RSS and Atom).
91              
92             =head1 SYNOPSIS
93              
94             my $perlanet = Perlanet->new;
95             $perlanet->run;
96              
97             =head1 DESCRIPTION
98              
99             Perlanet is a program for creating programs that aggregate web feeds (both
100             RSS and Atom). Web pages like this are often called "Planets" after the Python
101             software which originally popularised them. Perlanet is a planet builder
102             written in Perl - hence "Perlanet".
103              
104             You are probably interested in L<Perlanet::Simple> to get started straight
105             out of the box, batteries included style.
106              
107             Perlanet itself is the driving force behind everything, however. Perlanet
108             reads a series of web feeds (filtering only those that are valid), sorts
109             and selects entries from these web feeds, and then creates a new aggregate
110             feed and renders this aggregate feed. Perlanet allows the user to customize
111             all of these steps through subclassing and roles.
112              
113             For most uses, you probably don't want to use the Perlanet module. The
114             L<perlanet> command line program is far more likely to be useful.
115              
116             =head1 CONSTRUCTOR
117              
118             =head2 new
119              
120             my $perlanet = Perlanet->new
121              
122             The constructor method. Can be passed a hashref of initialisers.
123              
124             See L</ATTRIBUTES> below for details of the key/value pairs to pass in.
125              
126             =head1 ATTRIBUTES
127              
128             =over
129              
130             =item ua
131              
132             An instance of L<LWP::UserAgent>. Defaults to a simple agent using C<<
133             $cfg->{agent} >> as the user agent name, or C< Perlanet/$VERSION >.
134              
135             =item cutoff
136              
137             An instance of L<DateTime> which represents the earliest date for
138             which feed posts will be fetched/shown.
139              
140             =item feeds
141              
142             An arrayref of L<Perlanet::Feed> objects representing the feeds to
143             collect data from.
144              
145             =back
146              
147             =head1 METHODS
148              
149             =head2 fetch_page
150              
151             Attempt to fetch a web page and a returns a L<URI::Fetch::Response> object.
152              
153             =cut
154              
155             sub fetch_page {
156 0     0 1   my ($self, $url) = @_;
157 0           return URI::Fetch->fetch(
158             $url,
159             UserAgent => $self->ua,
160             ForceResponse => 1,
161             );
162             }
163              
164             =head2 fetch_feeds
165              
166             Called internally by L</run> and passed the list of feeds in L</feeds>.
167              
168             Attempt to download all given feeds, as specified in the C<feeds> attribute.
169             Returns a list of L<Perlanet::Feed> objects, with the actual feed data
170             loaded.
171              
172             NB: This method also modifies the contents of L</feeds>.
173              
174             =cut
175              
176             sub fetch_feeds {
177 0     0 1   my ($self, $feeds) = @_;
178              
179 0           my @valid_feeds;
180 0           for my $feed (@$feeds) {
181 0           my $response = $self->fetch_page($feed->url);
182              
183 0 0         if ($response->is_error) {
184 0           carp 'Error retrieving ' . $feed->url;
185 0           carp $response->http_response->status_line;
186 0           next;
187             }
188              
189 0 0         unless (length $response->content) {
190 0           carp 'No data returned from ' . $feed->url;
191 0           next;
192             }
193              
194             try {
195 0     0     my $data = $response->content;
196 0           my $xml_feed = XML::Feed->parse(\$data);
197              
198 0           $feed->_xml_feed($xml_feed);
199 0 0         $feed->title($xml_feed->title) unless $feed->title;
200              
201 0           push @valid_feeds, $feed;
202             }
203             catch {
204 0     0     carp 'Errors parsing ' . $feed->url;
205 0 0         carp $_ if defined $_;
206 0           };
207             }
208              
209 0           return \@valid_feeds;
210             }
211              
212             =head2 select_entries
213              
214             Called internally by L</run> and passed the list of feeds from
215             L</fetch_feeds>.
216              
217             Returns a combined list of L<Perlanet::Entry> objects from all given feeds.
218              
219             =cut
220              
221             sub select_entries {
222 0     0 1   my ($self, $feeds) = @_;
223              
224 0           my @feed_entries;
225 0           for my $feed (@$feeds) {
226 0           my @entries = $feed->_xml_feed->entries;
227              
228 0           @entries = @{ $self->sort_entries(\@entries) };
  0            
229              
230 0 0 0       if ($self->entries_per_feed and @entries > $self->entries_per_feed) {
231 0           $#entries = $self->entries_per_feed - 1;
232             }
233              
234             push @feed_entries,
235             map {
236 0           $_->title($feed->title . ': ' . $_->title);
  0            
237              
238             # Problem with XML::Feed's conversion of RSS to Atom
239 0 0 0       if ($_->issued && ! $_->modified) {
240 0           $_->modified($_->issued);
241             }
242              
243             Perlanet::Entry->new(
244 0           _entry => $_,
245             feed => $feed
246             );
247             } @entries;
248             }
249              
250 0           return \@feed_entries;
251             }
252              
253             =head2 sort_entries
254              
255             Called internally by L</run> and passed the list of entries from
256             L</select_entries>.
257              
258             Sort the given list of entries into created/modified order for aggregation,
259             and filters them if necessary.
260              
261             Takes a list of L<Perlanet::Entry>s, and returns an ordered list.
262              
263             =cut
264              
265             sub sort_entries {
266 0     0 1   my ($self, $entries) = @_;
267 0           my $day_zero = DateTime->from_epoch(epoch => 0);
268              
269 0           my @entries;
270              
271 0 0         if ($self->entry_sort_order eq 'modified') {
    0          
272             @entries = grep {
273 0   0       ($_->modified || $_->issued || $day_zero) < $self->cutoff
274             } sort {
275 0   0       ($b->modified || $b->issued || $day_zero)
  0   0        
276             <=>
277             ($a->modified || $a->issued || $day_zero)
278             } @$entries;
279             } elsif ($self->entry_sort_order eq 'issued') {
280             @entries = grep {
281 0   0       ($_->issued || $_->modified || $day_zero) < $self->cutoff
282             } sort {
283 0   0       ($b->issued || $b->modified || $day_zero)
  0   0        
284             <=>
285             ($a->issued || $a->modified || $day_zero)
286             } @$entries;
287             } else {
288 0           die 'Invalid entry sort order: ' . $self->entry_sort_order;
289             }
290              
291             # Only need so many entries
292 0 0 0       if ($self->entries && @entries > $self->entries) {
293 0           $#entries = $self->entries - 1;
294             }
295              
296 0           return \@entries;
297             }
298              
299             =head2 build_feed
300              
301             Called internally by L</run> and passed the list of entries from
302             L</sort_entries>.
303              
304             Takes a list of L<Perlanet::Entry>s, and returns a L<Perlanet::Feed>
305             that is the actual feed for the planet.
306              
307             =cut
308              
309             sub build_feed {
310 0     0 1   my ($self, $entries) = @_;
311              
312 0           my $self_url = $self->self_link;
313              
314 0           my $f = Perlanet::Feed->new( modified => DateTime->now );
315 0 0         $f->title($self->title) if defined $self->title;
316 0 0         $f->url($self->url) if defined $self->url;
317 0 0         $f->description($self->description) if defined $self->description;
318 0 0         $f->author($self->author->{name}) if defined $self->author->{name};
319 0 0         $f->email($self->author->{email}) if defined $self->author->{email};
320 0 0         $f->self_link($self->url) if defined $self->url;
321 0 0         $f->id($self->url) if defined $self->url;
322              
323 0           $f->add_entry($_) for @$entries;
324              
325 0           return $f;
326             }
327              
328             =head2 clean_html
329              
330             Clean a HTML string so it is suitable for display.
331              
332             Takes a HTML string and returns a "cleaned" HTML string.
333              
334             =cut
335              
336             sub clean_html {
337 0     0 1   my ($self, $entry) = @_;
338 0           return $entry;
339             }
340              
341             =head2 clean_entries
342              
343             Clean all entries for the planet.
344              
345             Takes a list of entries, runs them through C<clean> and returns a list of
346             cleaned entries.
347              
348             =cut
349              
350             sub clean_entries {
351 0     0 1   my ($self, $entries) = @_;
352              
353 0           my @clean_entries;
354              
355 0           foreach (@$entries) {
356 0 0         if (my $body = $_->content->body) {
357 0           my $cleaned = $self->clean_html($body);
358 0           $_->content->body($cleaned);
359             }
360              
361 0 0         if (my $summary = $_->summary->body) {
362 0           my $cleaned = $self->clean_html($summary);
363 0           $_->summary->body($cleaned);
364             }
365              
366 0           push @clean_entries, $_;
367             }
368              
369 0           return \@clean_entries;
370             }
371              
372             =head2 render
373              
374             Called internally by L</run> and passed the feed from L</build_feed>.
375              
376             This is the hook where you generate some type of page to display the result
377             of aggregating feeds together (ie, inserting the posts into a database,
378             running a HTML templating library, etc)
379              
380             Takes a L<Perlanet::Feed> as input (as generated by L<build_feed>.
381              
382             =cut
383              
384             sub render {
385 0     0 1   my ($self, $feed) = @_;
386             }
387              
388             =head2 run
389              
390             The main method which runs the perlanet process.
391              
392             =cut
393              
394             sub run {
395 0     0 1   my $self = shift;
396              
397 0           my $feeds = $self->fetch_feeds($self->feeds);
398 0           my $selected = $self->select_entries($feeds);
399 0           my $sorted = $self->sort_entries($selected);
400 0           my $cleaned = $self->clean_entries($sorted);
401 0           my $feed = $self->build_feed($cleaned);
402              
403 0           $self->render($feed);
404             }
405              
406             =head1 TO DO
407              
408             See http://wiki.github.com/davorg/perlanet
409              
410             =head1 SUPPORT
411              
412             There is a mailing list which acts as both a place for developers to talk
413             about maintaining and improving Perlanet and also for users to get support.
414             You can sign up to this list at
415             L<http://lists.mag-sol.com/mailman/listinfo/perlanet>
416              
417             To report bugs in Perlanet, please use the CPAN request tracker. You can
418             either use the web page at
419             L<http://rt.cpan.org/Public/Bug/Report.html?Queue=Perlanet> or send an email
420             to bug-Perlanet@rt.cpan.org.
421              
422             =head1 SEE ALSO
423              
424             =over 4
425              
426             =item *
427              
428             L<perlanet>
429              
430             =item *
431              
432             L<Plagger>
433              
434             =back
435              
436             =head1 AUTHOR
437              
438             Dave Cross, <dave@mag-sol.com>
439              
440             =head1 COPYRIGHT AND LICENSE
441              
442             Copyright (c) 2010 by Magnum Solutions Ltd.
443              
444             This library is free software; you can redistribute it and/or modify
445             it under the same terms as Perl itself, either Perl version 5.10.0 or,
446             at your option, any later version of Perl 5 you may have available.
447              
448             =cut
449              
450             1;