File Coverage

blib/lib/Perlanet.pm
Criterion Covered Total %
statement 90 119 75.6
branch 12 34 35.2
condition 1 18 5.5
subroutine 23 25 92.0
pod 9 9 100.0
total 135 205 65.8


line stmt bran cond sub pod time code
1             package Perlanet;
2              
3 6     6   8289 use strict;
  6         8  
  6         151  
4 6     6   18 use warnings;
  6         22  
  6         114  
5              
6 6     6   53 use 5.6.0;
  6         13  
7             our $VERSION = '0.58';
8              
9 6     6   435 use Moose;
  6         291725  
  6         32  
10 6     6   26504 use namespace::autoclean;
  6         5953  
  6         31  
11              
12 6     6   308 use Carp;
  6         6  
  6         292  
13 6     6   2731 use DateTime::Duration;
  6         1739213  
  6         178  
14 6     6   43 use DateTime;
  6         10  
  6         91  
15 6     6   2653 use Perlanet::Entry;
  6         19  
  6         564  
16 6     6   2817 use Perlanet::Feed;
  6         19  
  6         233  
17 6     6   39 use Try::Tiny;
  6         9  
  6         353  
18 6     6   23 use URI::Fetch;
  6         7  
  6         108  
19 6     6   19 use XML::Feed;
  6         6  
  6         6000  
20              
21             with 'MooseX::Traits';
22              
23             $XML::Atom::ForceUnicode = 1;
24              
25             has 'ua' => (
26             is => 'rw',
27             isa => 'LWP::UserAgent',
28             lazy_build => 1
29             );
30              
31             sub _build_ua {
32 4     4   6 my $self = shift;
33 4         45 my $ua = LWP::UserAgent->new(
34             agent => "Perlanet/$VERSION"
35             );
36 4 50       9213 $ua->show_progress(1) if -t STDOUT;
37 4         20 $ua->env_proxy;
38              
39 4         11339 return $ua;
40             }
41              
42             has 'cutoff' => (
43             isa => 'DateTime',
44             is => 'ro',
45             default => sub {
46             DateTime->now + DateTime::Duration->new(weeks => 1);
47             }
48             );
49              
50             has 'entries' => (
51             isa => 'Int',
52             is => 'rw',
53             default => 10,
54             );
55              
56             has 'entries_per_feed' => (
57             isa => 'Int',
58             is => 'rw',
59             default => 5,
60             );
61              
62             has 'feeds' => (
63             isa => 'ArrayRef',
64             is => 'ro',
65             default => sub { [] }
66             );
67              
68             has 'author' => (
69             isa => 'HashRef',
70             is => 'ro',
71             );
72              
73             has $_ => (
74             isa => 'Str',
75             is => 'ro',
76             ) for qw( self_link title description url agent );
77              
78             =head1 NAME
79              
80             Perlanet - A program for creating programs that aggregate web feeds (both
81             RSS and Atom).
82              
83             =head1 SYNOPSIS
84              
85             my $perlanet = Perlanet->new;
86             $perlanet->run;
87              
88             =head1 DESCRIPTION
89              
90             Perlanet is a program for creating programs that aggregate web feeds (both
91             RSS and Atom). Web pages like this are often called "Planets" after the Python
92             software which originally popularised them. Perlanet is a planet builder
93             written in Perl - hence "Perlanet".
94              
95             You are probably interested in L<Perlanet::Simple> to get started straight
96             out of the box, batteries included style.
97              
98             Perlanet itself is the driving force behind everything, however. Perlanet
99             reads a series of web feeds (filtering only those that are valid), sorts
100             and selects entries from these web feeds, and then creates a new aggregate
101             feed and renders this aggregate feed. Perlanet allows the user to customize
102             all of these steps through subclassing and roles.
103              
104             For most uses, you probably don't want to use the Perlanet module. The
105             L<perlanet> command line program is far more likely to be useful.
106              
107             =head1 CONSTRUCTOR
108              
109             =head2 new
110              
111             my $perlanet = Perlanet->new
112              
113             The constructor method. Can be passed a hashref of initialisers.
114              
115             See L</ATTRIBUTES> below for details of the key/value pairs to pass in.
116              
117             =head1 ATTRIBUTES
118              
119             =over
120              
121             =item ua
122              
123             An instance of L<LWP::UserAgent>. Defaults to a simple agent using C<<
124             $cfg->{agent} >> as the user agent name, or C< Perlanet/$VERSION >.
125              
126             =item cutoff
127              
128             An instance of L<DateTime> which represents the earliest date for
129             which feed posts will be fetched/shown.
130              
131             =item feeds
132              
133             An arrayref of L<Perlanet::Feed> objects representing the feeds to
134             collect data from.
135              
136             =back
137              
138             =head1 METHODS
139              
140             =head2 fetch_page
141              
142             Attempt to fetch a web page and a returns a L<URI::Fetch::Response> object.
143              
144             =cut
145              
146             sub fetch_page {
147 0     0 1 0 my ($self, $url) = @_;
148 0         0 return URI::Fetch->fetch(
149             $url,
150             UserAgent => $self->ua,
151             ForceResponse => 1,
152             );
153             }
154              
155             =head2 fetch_feeds
156              
157             Called internally by L</run> and passed the list of feeds in L</feeds>.
158              
159             Attempt to download all given feeds, as specified in the C<feeds> attribute.
160             Returns a list of L<Perlanet::Feed> objects, with the actual feed data
161             loaded.
162              
163             NB: This method also modifies the contents of L</feeds>.
164              
165             =cut
166              
167             sub fetch_feeds {
168 5     5 1 10 my ($self, @feeds) = @_;
169              
170 5         8 my @valid_feeds;
171 5         12 for my $feed (@feeds) {
172 6         1069 my $response = $self->fetch_page($feed->url);
173              
174 6 50       79749 if ($response->is_error) {
175 0         0 carp 'Error retrieving ' . $feed->url;
176 0         0 carp $response->http_response->status_line;
177 0         0 next;
178             }
179              
180 6 50       122 unless (length $response->content) {
181 0         0 carp 'No data returned from ' . $feed->url;
182 0         0 next;
183             }
184              
185             try {
186 6     6   153 my $data = $response->content;
187 6         74 my $xml_feed = XML::Feed->parse(\$data);
188              
189 6         603 $feed->_xml_feed($xml_feed);
190 0 0       0 $feed->title($xml_feed->title) unless $feed->title;
191              
192 0         0 push @valid_feeds, $feed;
193             }
194             catch {
195 6     6   141897 carp 'Errors parsing ' . $feed->url;
196 6 50       891 carp $_ if defined $_;
197 6         78 };
198             }
199              
200 5         4326 return @valid_feeds;
201             }
202              
203             =head2 select_entries
204              
205             Called internally by L</run> and passed the list of feeds from
206             L</fetch_feeds>.
207              
208             Returns a combined list of L<Perlanet::Entry> objects from all given feeds.
209              
210             =cut
211              
212             sub select_entries {
213 5     5 1 3091 my ($self, @feeds) = @_;
214              
215 5         9 my @feed_entries;
216 5         11 for my $feed (@feeds) {
217 0         0 my @entries = $feed->_xml_feed->entries;
218              
219 0 0 0     0 if ($self->entries_per_feed and @entries > $self->entries_per_feed) {
220 0         0 $#entries = $self->entries_per_feed - 1;
221             }
222              
223             push @feed_entries,
224             map {
225 0         0 $_->title($feed->title . ': ' . $_->title);
  0         0  
226              
227             # Problem with XML::Feed's conversion of RSS to Atom
228 0 0 0     0 if ($_->issued && ! $_->modified) {
229 0         0 $_->modified($_->issued);
230             }
231              
232             Perlanet::Entry->new(
233 0         0 _entry => $_,
234             feed => $feed
235             );
236             } @entries;
237             }
238              
239 5         11 return @feed_entries;
240             }
241              
242             =head2 sort_entries
243              
244             Called internally by L</run> and passed the list of entries from
245             L</select_entries>.
246              
247             Sort the given list of entries into created/modified order for aggregation,
248             and filters them if necessary.
249              
250             Takes a list of L<Perlanet::Entry>s, and returns an ordered list.
251              
252             =cut
253              
254             sub sort_entries {
255 3     3 1 2199 my ($self, @entries) = @_;
256 3         27 my $day_zero = DateTime->from_epoch(epoch => 0);
257              
258             @entries = grep {
259 0   0     0 ($_->issued || $_->modified || $day_zero) < $self->cutoff
260             } sort {
261 3   0     831 ($b->modified || $b->issued || $day_zero)
  0   0     0  
262             <=>
263             ($a->modified || $a->issued || $day_zero)
264             } @entries;
265              
266             # Only need so many entries
267 3 50 33     85 if ($self->entries && @entries > $self->entries) {
268 0         0 $#entries = $self->entries - 1;
269             }
270              
271 3         16 return @entries;
272             }
273              
274             =head2 build_feed
275              
276             Called internally by L</run> and passed the list of entries from
277             L</sort_entries>.
278              
279             Takes a list of L<Perlanet::Entry>s, and returns a L<Perlanet::Feed>
280             that is the actual feed for the planet.
281              
282             =cut
283              
284             sub build_feed {
285 3     3 1 2180 my ($self, @entries) = @_;
286              
287 3         83 my $self_url = $self->self_link;
288              
289 3         14 my $f = Perlanet::Feed->new( modified => DateTime->now );
290 3 50       72 $f->title($self->title) if defined $self->title;
291 3 50       67 $f->url($self->url) if defined $self->url;
292 3 50       67 $f->description($self->description) if defined $self->description;
293 3 50       67 $f->author($self->author->{name}) if defined $self->author->{name};
294 3 50       52 $f->email($self->author->{email}) if defined $self->author->{email};
295 3 50       53 $f->self_link($self->url) if defined $self->url;
296 3 50       52 $f->id($self->url) if defined $self->url;
297              
298 3         9 $f->add_entry($_) for @entries;
299              
300 3         8 return $f;
301             }
302              
303             =head2 clean_html
304              
305             Clean a HTML string so it is suitable for display.
306              
307             Takes a HTML string and returns a "cleaned" HTML string.
308              
309             =cut
310              
311             sub clean_html {
312 0     0 1 0 my ($self, $entry) = @_;
313 0         0 return $entry;
314             }
315              
316             =head2 clean_entries
317              
318             Clean all entries for the planet.
319              
320             Takes a list of entries, runs them through C<clean> and returns a list of
321             cleaned entries.
322              
323             =cut
324              
325             sub clean_entries {
326 3     3 1 2187 my ($self, @entries) = @_;
327              
328 3         6 my @clean_entries;
329              
330 3         6 foreach (@entries) {
331 0 0       0 if (my $body = $_->content->body) {
332 0         0 my $cleaned = $self->clean_html($body);
333 0         0 $_->content->body($cleaned);
334             }
335              
336 0 0       0 if (my $summary = $_->summary->body) {
337 0         0 my $cleaned = $self->clean_html($summary);
338 0         0 $_->summary->body($cleaned);
339             }
340              
341 0         0 push @clean_entries, $_;
342             }
343              
344 3         8 return @clean_entries;
345             }
346              
347             =head2 render
348              
349             Called internally by L</run> and passed the feed from L</build_feed>.
350              
351             This is the hook where you generate some type of page to display the result
352             of aggregating feeds together (ie, inserting the posts into a database,
353             running a HTML templating library, etc)
354              
355             Takes a L<Perlanet::Feed> as input (as generated by L<build_feed>.
356              
357             =cut
358              
359             sub render {
360 1     1 1 18 my ($self, $feed) = @_;
361             }
362              
363             =head2 run
364              
365             The main method which runs the perlanet process.
366              
367             =cut
368              
369             sub run {
370 1     1 1 9 my $self = shift;
371              
372 1         1 my @feeds = $self->fetch_feeds(@{$self->feeds});
  1         24  
373 1         9 my @selected = $self->select_entries(@feeds);
374 1         6 my @sorted = $self->sort_entries(@selected);
375 1         6 my @cleaned = $self->clean_entries(@sorted);
376 1         5 my $feed = $self->build_feed(@cleaned);
377              
378 1         5 $self->render($feed);
379             }
380              
381             =head1 TO DO
382              
383             See http://wiki.github.com/davorg/perlanet
384              
385             =head1 SUPPORT
386              
387             There is a mailing list which acts as both a place for developers to talk
388             about maintaining and improving Perlanet and also for users to get support.
389             You can sign up to this list at
390             L<http://lists.mag-sol.com/mailman/listinfo/perlanet>
391              
392             To report bugs in Perlanet, please use the CPAN request tracker. You can
393             either use the web page at
394             L<http://rt.cpan.org/Public/Bug/Report.html?Queue=Perlanet> or send an email
395             to bug-Perlanet@rt.cpan.org.
396              
397             =head1 SEE ALSO
398              
399             =over 4
400              
401             =item *
402              
403             L<perlanet>
404              
405             =item *
406              
407             L<Plagger>
408              
409             =back
410              
411             =head1 AUTHOR
412              
413             Dave Cross, <dave@mag-sol.com>
414              
415             =head1 COPYRIGHT AND LICENSE
416              
417             Copyright (c) 2010 by Magnum Solutions Ltd.
418              
419             This library is free software; you can redistribute it and/or modify
420             it under the same terms as Perl itself, either Perl version 5.10.0 or,
421             at your option, any later version of Perl 5 you may have available.
422              
423             =cut
424              
425             1;