File Coverage

blib/lib/WWW/Spinn3r.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package WWW::Spinn3r;
2 2     2   78013 use base Class::Accessor;
  2         6  
  2         8298  
3 2     2   5263 use base WWW::Spinn3r::Common;
  2         5  
  2         993  
4 2     2   3913 use LWP::UserAgent;
  2         122121  
  2         75  
5 2     2   1327 use Data::Dumper;
  2         19887  
  2         183  
6 2     2   15 use Carp;
  2         5  
  2         121  
7 2     2   1138 use WWW::Spinn3r::next_request_url;
  0            
  0            
8             use WWW::Spinn3r::item;
9             use WWW::Spinn3r::link;
10             use File::Spec;
11              
12             __PACKAGE__->mk_accessors(qw( api api_url from_file future_sleep next_url retries retry_sleep last_url path this_cursor this_feed version want));
13              
14             $WWW::Spinn3r::VERSION = '3.00700001';
15              
16             our $DEFAULTS = {
17             api_url => 'http://api.spinn3r.com/rss',
18             debug => 0,
19             retries => 60 * 60 * 24 * 10,
20             retry_sleep => 3,
21             future_sleep => 5,
22             version => '3.0.7',
23             want => 'item',
24             };
25              
26              
27             sub new {
28              
29             my ($class, %args) = @_;
30              
31             my $self = bless { %$DEFAULTS, %args }, $class;
32              
33             if ($args{from_file}) {
34             # check for file's existance
35             return $self;
36             }
37              
38             croak "Need vendor key" unless $args{params}->{vendor};
39             croak "Need api name" unless $args{api};
40             $self->{ua} = new LWP::UserAgent (timeout => 30);
41             unless ($args{mirror}) {
42             $self->{ua}->default_header('Accept-Encoding' => 'gzip');
43             }
44              
45             return $self;
46              
47             }
48              
49              
50             sub mirror {
51              
52             my ($class, %args) = @_;
53             croak "no mirror path provided" unless $args{path};
54             return $class->new(%args, mirror => 1);
55              
56             }
57              
58              
59             sub first_url {
60              
61             my ($self) = @_;
62              
63             # use default version if one is not provided.
64             if (defined $self->{params}->{version}) {
65             $self->version($self->{params}->{version});
66             delete $self->{params}->{version};
67             }
68              
69             my $url = $self->api_url . '/' . $self->api . '?version=' . $self->version;
70             for my $param (keys %{ $self->{params} }) {
71             $url .= '&' . $param . '=' . $self->{params}->{$param};
72             }
73              
74             return $url;
75              
76             }
77              
78              
79             sub _next_feed_from_http {
80              
81             my ($self, $url, %args) = @_;
82              
83             my $tries = 0;
84             my $content = '';
85            
86             while ($tries < $self->retries and not $content) {
87            
88             $tries++;
89              
90             my ($response, $content_file, $length);
91              
92             my $start = $self->start_timer();
93             if ($$self{mirror}) {
94             $content_file = $self->local_file($$self{path}, $url);
95             $self->debug("fetching (to file $content_file) $url");
96             $response = $self->{ua}->get($url, ':content_file' => $content_file);
97             } else {
98             $self->debug("fetching (to memory) $url");
99             $response = $self->{ua}->get($url);
100             }
101            
102             my $howlong = $self->howlong($start);
103              
104             unless ($response->is_success) {
105             $self->debug($response->status_line);
106             if ($response->status_line =~ /^4\d\d/) {
107             last;
108             }
109             $self->debug("sleeping for " . $self->retry_sleep . " seconds...");
110             sleep($self->retry_sleep);
111             } else {
112             $length = $$self{mirror} ? -s $content_file : length($response->content);
113             $self->debug("success! $length bytes, in $howlong seconds");
114             if ($$self{mirror}) {
115             $content = $content_file;
116             } else {
117             $content = $response->decoded_content;
118             }
119             }
120            
121             }
122              
123             unless ($content) {
124             croak "Unable to fetch from spinn3r: $url";
125             }
126              
127             if ($$self{mirror}) {
128             return $content;
129             } else {
130             return \$content;
131             }
132              
133             }
134              
135              
136             sub local_file {
137              
138             my ($self, $path, $url) = @_;
139            
140             my $urlpath = URI->new($url)->path;
141             $urlpath =~ s|^/rss/||;
142              
143             my $filename .= ' ' . URI->new($url)->query;
144             $filename =~ s"\W"-"sg;
145             $filename = $urlpath . '-' . $filename . '.xml';
146              
147             my $fullpath = File::Spec->catfile($path, $filename);
148             $self->debug("mirror filename: $fullpath");
149             return $fullpath;
150              
151             }
152              
153              
154             sub next_feed {
155              
156             my ($self) = @_;
157              
158             if ($self->{ua}) {
159              
160             my $url = $self->next_url || $self->first_url;
161              
162             if ($url =~ /before=$/) { # work around bug in permalink.history
163             return;
164             }
165              
166             if ($url eq $self->last_url or $url =~ /after=$/) {
167             $self->debug("it's the future! will wait for present to catch up. sleeping " . $self->future_sleep . " seconds");
168             sleep($self->future_sleep);
169             $self->last_url(undef);
170             return $self->next_feed();
171             }
172              
173             my $xml = $self->_next_feed_from_http($url, %args);
174              
175             $self->last_url($url);
176             if ($self->want eq 'item') {
177             my $items = WWW::Spinn3r::item->new(stringref => $xml, debug => $self->{debug});
178             return unless $items;
179             $self->this_feed($items);
180             } elsif ($self->want eq 'link') {
181             $self->this_feed(WWW::Spinn3r::link->new(stringref => $xml, debug => $self->{debug}));
182             }
183              
184             } elsif ($self->{from_file}) {
185             my $content_file = File::Spec->catfile($self->from_file);
186             if ($self->want eq 'item') {
187             $self->this_feed(WWW::Spinn3r::item->new(path => $content_file, debug => $self->{debug}));
188             } elsif ($self->want eq 'link') {
189             $self->this_feed(WWW::Spinn3r::link->new(path => $content_file, debug => $self->{debug}));
190             }
191             }
192              
193             }
194              
195              
196             sub next {
197              
198             my ($self) = @_;
199              
200             unless ($self->this_feed) {
201              
202             $self->next_feed();
203             return undef unless $self->this_feed; # fetch failed
204             $self->this_cursor(0);
205             return unless $self->this_feed;
206             return unless $self->this_feed->{'api:next_request_url'};
207             $self->next_url($self->this_feed->{'api:next_request_url'});
208             return $self->next();
209              
210             }
211              
212             my $item = $self->this_feed->{$self->want}->[$self->this_cursor];
213              
214             unless ($item) {
215             $self->this_feed(undef);
216             return undef if $self->from_file;
217             return $self->next();
218             }
219              
220             $self->this_cursor($self->this_cursor+1);
221             return $item;
222              
223             }
224              
225              
226             sub next_mirror {
227              
228             my ($self, %args) = @_;
229              
230             unless ($self->{mirror}) {
231             warn ("next_mirror called in non-mirror mode");
232             return;
233             }
234             my $url = $self->next_url || $self->first_url;
235              
236             if ($url eq $self->last_url or $url =~ /after=$/) {
237             $self->debug("it's the future! will wait for present to catch up. sleeping " . $self->future_sleep . " seconds");
238             sleep($self->future_sleep);
239             $self->last_url(undef);
240             return $self->next_mirror();
241             }
242              
243             my $filename = $self->_next_feed_from_http($url);
244             $self->last_url($url);
245             my $next_url = new WWW::Spinn3r::next_request_url(path => $filename, debug => $self->{debug});
246             $self->next_url($next_url->{'api:next_request_url'});
247              
248             }
249              
250            
251             1;
252              
253              
254             =head1 NAME
255            
256             WWW::Spinn3r - An interface to the Spinn3r API (http://www.spinn3r.com)
257              
258             =head1 SYNOPSIS
259              
260             use WWW::Spinn3r;
261             use DateTime;
262              
263             my $API = {
264             vendor => 'acme', # required
265             limit => 5,
266             lang => 'en',
267             tier => '0:5',
268             after => DateTime->now()->subtract(hours => 48),
269             };
270              
271             my $spnr = new WWW::Spinn3r (
272             api => 'permalink3.getDelta', params => $API, debug => 1);
273             );
274              
275             while(1) {
276             my $item = $spnr->next;
277             print $item->{title};
278             print $item->{link};
279             print $item->{dc}->{source};
280             print $item->{description};
281             }
282              
283             =head1 DESCRIPTION
284              
285             WWW::Spinn3r is an iterative interface to the Spinn3r API. The Spinn3r API
286             is implemented over REST and XML and documented at
287             C.
288              
289             =head1 OBTAINING A VENDOR KEY
290              
291             Spinn3r service is available through a B key, which you can
292             get from the good folks at Tailrank, C.
293              
294             =head1 HOW TO USE
295              
296             Most commonly, you'll need just two functions from this module: C
297             and C. C creates a new instance of the API and C
298             returns the next item from the Spinn3r feed, as hashref. Details
299             are below.
300              
301             =head1 B
302              
303             The contructor. This function takes a hash with the following keys:
304              
305             =over 4
306              
307             =item B
308              
309             C or C, one of the two APIs
310             provided by Spinn3r.
311              
312             =item B
313              
314             These are parameters that are passed to the API call. See
315             C for a list of available parameters
316             and their values.
317              
318             The B parameter to the API is a function of version of this
319             module. and the B accessor method returns the version
320             of the API. By default, the version will be set to the version
321             that corresponds to this module.
322              
323             If the version of the spinn3r API has changed, you can specify it
324             as a parameter. While the module is not guranteed to work with higher
325             versions of the Spinn3r API than it is designed for, it might if the
326             underlying formats and encodings have not changed.
327              
328             =item B
329              
330             This parameter defines the type of item returned by the next() call.
331             WWW::Spinn3r uses XML::Twig to parse the XML returned by Spinn3r and
332             comes with three Twig parsers, C,
333             C and C. The default
334             value for C is C, which corresponds to the
335             C module and returns all fields for an item included
336             in the Spinn3r feed.
337              
338             The motivation for having multiple parsers is speed. If you only want
339             certain fields from the feed, for example the link and title, it is
340             significantly faster to write a parser that just extracts those two
341             fields from the feed with XML::Twig.
342              
343             =item B
344              
345             Emits debug noise on STDOUT if set to 1.
346              
347             =item B
348              
349             The number of HTTP retries in case of a 5xx failure from the API.
350             The default is 5.
351              
352             =back
353              
354             =head1 B
355              
356             This method returns the next item from the Spinn3r feed. The item is a
357             reference to a hash, which contains the various fields of an item
358             as parsed by the parser specified in the C field of the
359             consutructor (C by default).
360              
361             The module transparently fetches a new set of results from Spinn3r,
362             using the C returned by Spinn3r with every
363             request, and caches the result to implement C.
364              
365             You can control the number of results that are fetched with every call
366             by changing the C parameter at C.
367              
368             =head1 B
369              
370             The last API URL that was fetched.
371              
372             =head1 B
373              
374             WWW::Spinn3r supports mirroring of the Spinn3r feed to local files
375             and then recreating WWW:Spinn3r objects from these files. This
376             is useful if you want to distribute processing of the feeds
377             over multiple processes or computers.
378              
379             To mirror feeds to disk, use the alternative constructor B,
380             which takes all the same arguments as B plus the
381             C argument, which specifies where the files should saved.
382              
383             my $m = mirror WWW::Spinn3r ( path => $mirror_dir, ... )
384             $m->next_mirror();
385              
386             The iteration is done with B method, which stores the
387             next feed to a new file, whose filename is derived from the API url.
388              
389             WWW::Spinn3r objects can be created from these disk files when
390             new() is called with the C key:
391              
392             my $m = new WWW::Spinn3r ( from_file => ... );
393              
394             =head1 DATE STRING FORMAT
395              
396             Spinn3r supports ISO 8601 timestamps in the C parameter. To
397             create ISO 8601 timestamps, use the DateTime module that returns ISO
398             8601 date strings by default. eg:
399              
400             after => DateTime->now()->subtract(hours => 48),
401             after => DateTime->now()->subtract(days => 31),
402              
403             =head1 REPORTING BUGS
404              
405             Bugs should be reported at C
406              
407             =head1 SEE ALSO
408              
409             WWW::Spinn3r::Synced
410              
411             =head1 AUTHOR
412              
413             Vipul Ved Prakash
414              
415             =head1 LICENSE
416              
417             This software is distributed under the same terms as perl itself.
418              
419             =cut