File Coverage

blib/lib/WWW/Pastebin/PastebinCom/Retrieve.pm
Criterion Covered Total %
statement 43 64 67.1
branch 15 32 46.8
condition 18 51 35.2
subroutine 9 9 100.0
pod 1 1 100.0
total 86 157 54.7


line stmt bran cond sub pod time code
1             package WWW::Pastebin::PastebinCom::Retrieve;
2              
3 1     1   111465 use warnings;
  1         3  
  1         38  
4 1     1   5 use strict;
  1         2  
  1         49  
5              
6             our $VERSION = '0.002';
7              
8 1     1   7 use URI;
  1         5  
  1         21  
9 1     1   5 use HTML::TokeParser::Simple;
  1         2  
  1         32  
10 1     1   5 use HTML::Entities;
  1         2  
  1         88  
11 1     1   5 use base 'WWW::Pastebin::Base::Retrieve';
  1         2  
  1         1128  
12              
13             sub retrieve {
14 1     1 1 4589 my $self = shift;
15 1         2 my $id = shift;
16              
17 1         10 $self->$_(undef) for qw(error uri id results);
18            
19 1 50 33     29 return $self->_set_error('Missing or empty paste ID or URI')
20             unless defined $id and length $id;
21              
22 1 50       4 ( my $uri, $id ) = $self->_make_uri_and_id( $id, @_ )
23             or return;
24              
25 1         8514 $self->id( $id );
26 1         10 $self->uri( $uri );
27              
28 1         8 my $ua = $self->ua;
29 1         11 my $response = $ua->get( $uri );
30 1 50 33     360956 if (
31             $response->is_success
32             or $response->code == 404 # and just WHY they thought giving 404s
33             # on existing pastes is such a great idea?
34             ) {
35 1         40 return $self->_get_was_successful( $response->content );
36             }
37             else {
38 0         0 return $self->_set_error('Network error: ' . $response->status_line);
39             }
40             }
41              
42             sub _make_uri_and_id {
43 1     1   2 my ( $self, $what ) = @_;
44              
45 1         8 my ( $private, $id ) = $what =~ m{
46             (?:http://)?
47             (?:www\.)?
48             (.*?) # "private paste" subdomain
49             pastebin\.com/
50             (\w+) # paste ID
51             }xi;
52              
53 1 50 33     8 $id = $what
54             unless defined $id and length $id;
55              
56 1 50       5 $private = ''
57             unless defined $private;
58              
59 1         8 return ( URI->new("http://${private}pastebin.com/$id"), $id );
60             }
61              
62             sub _parse {
63 1     1   64 my ( $self, $content ) = @_;
64              
65             # yes, they could've given 200s on existing pastes and 404s on
66             # non-existant, but NO!! 404s for EVERYONE... yey \o/
67             # that calls for urgent parsing of HTML with regexen WEEEEEEE
68 1 50 33     16 $content =~ m|404 Not Found|
69             and $content !~ /
/
70             and return $self->_set_error('This paste does not seem to exist');
71              
72 1         11 my $parser = HTML::TokeParser::Simple->new( \$content );
73            
74 1         190 my ( %data, %nav );
75 1         8 @nav{ qw(level start get_name_date get_lang get_content) }
76             = (0) x 5;
77              
78 1         6 while ( my $t = $parser->get_token ) {
79 965 50 100     47067 if ( $t->is_start_tag('div')
    50 66        
    50 33        
    50 33        
    50 33        
    50 33        
    50 33        
    50 33        
      33        
      0        
      33        
      33        
80             and defined $t->get_attr('id')
81             and $t->get_attr('id') eq 'content'
82             ) {
83 0         0 @nav{ qw(level start) } = (1, 1);
84             }
85             elsif ( $nav{start} == 1 and $t->is_start_tag('h1') ) {
86 0         0 @nav{ qw(level get_name_date) } = (2, 1);
87             }
88             elsif ( $nav{get_name_date} == 1 and $t->is_text ) {
89 0         0 @data{ qw(name posted_on) } = $t->as_is
90             =~ /Posted by (.+) on (.+)/;
91              
92 0         0 @nav{ qw(level get_name_date) } = (3, 0);
93             }
94             elsif ( $nav{start} == 1
95             and $t->is_start_tag('option')
96             and defined $t->get_attr('selected')
97             ) {
98 0         0 @nav{ qw(level get_lang) } = (4, 1);
99             }
100             elsif ( $nav{get_lang} == 1 and $t->is_text ) {
101 0         0 @nav{ qw(level get_lang) } = (5, 0);
102 0         0 $data{lang} = $t->as_is;
103             }
104             elsif ( $nav{start} == 1
105             and $t->is_start_tag('textarea')
106             and defined $t->get_attr('id')
107             and $t->get_attr('id') eq 'code'
108             ) {
109 0         0 @nav{ qw(level get_content) } = (6, 1);
110             }
111             elsif ( $nav{get_content} == 1 and $t->is_text ) {
112 0         0 $data{content} = $t->as_is;
113 0         0 $nav{is_success} = 1;
114 0         0 last;
115             }
116             elsif ( $nav{get_content} == 1 and $t->is_end_tag('textarea') ) {
117 0         0 return $self->_set_error('This paste does not seem to exist');
118             }
119             }
120              
121 1 50       29 unless ( $nav{is_success} ) {
122 1         97 return $self->_set_error (
123             "Parser error (level $nav{level}).\n"
124             . "Failed on content:\n$content"
125             );
126             }
127              
128 0           for ( values %data ) {
129 0 0 0       unless ( defined and length ) {
130 0           $_ = 'N/A';
131 0           next;
132             }
133 0           decode_entities $_;
134 0           s/\240/ /g;
135             }
136              
137 0           $self->content( $data{content} );
138 0           return \%data;
139             }
140              
141              
142             =head1 NAME
143              
144             WWW::Pastebin::PastebinCom::Retrieve - retrieve pastes from http://pastebin.com/ website
145              
146             =head1 SYNOPSIS
147              
148             use strict;
149             use warnings;
150              
151             use lib '../lib';
152             use WWW::Pastebin::PastebinCom::Retrieve;
153              
154             die "Usage: perl retrieve.pl \n"
155             unless @ARGV;
156              
157             my $Paste = shift;
158              
159             my $paster = WWW::Pastebin::PastebinCom::Retrieve->new;
160              
161             my $results_ref = $paster->retrieve( $Paste )
162             or die $paster->error;
163              
164             printf "Paste content is:\n%s\nPasted by %s on %s\n",
165             @$results_ref{ qw(content name posted_on) };
166              
167             =head1 DESCRIPTION
168              
169             The module provides interface to retrieve pastes from
170             L website via Perl.
171              
172             =head1 CONSTRUCTOR
173              
174             =head2 C
175              
176             my $paster = WWW::Pastebin::PastebinCom::Retrieve->new;
177              
178             my $paster = WWW::Pastebin::PastebinCom::Retrieve->new(
179             timeout => 10,
180             );
181              
182             my $paster = WWW::Pastebin::PastebinCom::Retrieve->new(
183             ua => LWP::UserAgent->new(
184             timeout => 10,
185             agent => 'PasterUA',
186             ),
187             );
188              
189             Constructs and returns a brand new juicy
190             WWW::Pastebin::PastebinCom::Retrieve
191             object. Takes two arguments, both are I. Possible arguments are
192             as follows:
193              
194             =head3 C
195              
196             ->new( timeout => 10 );
197              
198             B. Specifies the C argument of L's
199             constructor, which is used for retrieving. B C<30> seconds.
200              
201             =head3 C
202              
203             ->new( ua => LWP::UserAgent->new( agent => 'Foos!' ) );
204              
205             B. If the C argument is not enough for your needs
206             of mutilating the L object used for retrieving, feel free
207             to specify the C argument which takes an L object
208             as a value. B the C argument to the constructor will
209             not do anything if you specify the C argument as well. B
210             plain boring default L object with C argument
211             set to whatever C's C
212             argument is
213             set to as well as C argument is set to mimic Firefox.
214              
215             =head1 METHODS
216              
217             =head2 C
218              
219             my $results_ref = $paster->retrieve('http://pastebin.com/f525c4cec')
220             or die $paster->error;
221              
222             my $results_ref = $paster->retrieve('f525c4cec')
223             or die $paster->error;
224              
225             Instructs the object to retrieve a paste specified in the argument. Takes
226             one mandatory argument which can be either a full URI to the paste you
227             want to retrieve or just its ID.
228             On failure returns either C or an empty list depending on the context
229             and the reason for the error will be available via C method.
230             On success returns a hashref with the following keys/values:
231              
232             $VAR1 = {
233             'lang' => 'Perl',
234             'posted_on' => 'Sat 22 Mar 16:07',
235             'content' => 'blah blah content of the paste',
236             'name' => 'Zoffix'
237             };
238              
239             =head3 content
240              
241             { 'content' => 'blah blah content of the paste', }
242              
243             The C key will contain the actual content of the paste. See also
244             C method which is overloaded for this class.
245              
246             =head3 lang
247              
248             { 'lang' => 'Perl' }
249              
250             The C key will contain the (computer) language of the paste
251             (as specified by the person who pasted it)
252              
253             =head3 posted_on
254              
255             { 'posted_on' => 'Sat 22 Mar 16:07', }
256              
257             The C key will contain the date/time when the paste was created.
258              
259             =head3 name
260              
261             { 'name' => 'Zoffix' }
262              
263             The C key will contain the name of the person who created the paste.
264              
265              
266             =head2 C
267              
268             $paster->retrieve('http://pastebin.com/f525c4cec')
269             or die $paster->error;
270              
271             On failure C returns either C or an empty list depending
272             on the context and the reason for the error will be available via C
273             method. Takes no arguments, returns an error message explaining the failure.
274              
275             =head2 C
276              
277             my $paste_id = $paster->id;
278              
279             Must be called after a successful call to C. Takes no arguments,
280             returns a paste ID number of the last retrieved paste irrelevant of whether
281             an ID or a URI was given to C
282              
283             =head2 C
284              
285             my $paste_uri = $paster->uri;
286              
287             Must be called after a successful call to C. Takes no arguments,
288             returns a L object with the URI pointing to the last retrieved paste
289             irrelevant of whether an ID or a URI was given to C
290              
291             =head2 C
292              
293             my $last_results_ref = $paster->results;
294              
295             Must be called after a successful call to C. Takes no arguments,
296             returns the exact same hashref the last call to C returned.
297             See C method for more information.
298              
299             =head2 C
300              
301             my $paste_content = $paster->content;
302              
303             print "Paste content is:\n$paster\n";
304              
305             Must be called after a successful call to C. Takes no arguments,
306             returns the actual content of the paste. B this method is overloaded
307             for this module for interpolation. Thus you can simply interpolate the
308             object in a string to get the contents of the paste.
309              
310             =head2 C
311              
312             my $old_LWP_UA_obj = $paster->ua;
313              
314             $paster->ua( LWP::UserAgent->new( timeout => 10, agent => 'foos' );
315              
316             Returns a currently used L object used for retrieving
317             pastes. Takes one optional argument which must be an L
318             object, and the object you specify will be used in any subsequent calls
319             to C.
320              
321             =head1 SEE ALSO
322              
323             L, L
324              
325             =head1 AUTHOR
326              
327             Zoffix Znet, C<< >>
328             (L, L)
329              
330             =head1 BUGS
331              
332             Please report any bugs or feature requests to C, or through
333             the web interface at L. I will be notified, and then you'll
334             automatically be notified of progress on your bug as I make changes.
335              
336             =head1 SUPPORT
337              
338             You can find documentation for this module with the perldoc command.
339              
340             perldoc WWW::Pastebin::PastebinCom::Retrieve
341              
342             You can also look for information at:
343              
344             =over 4
345              
346             =item * RT: CPAN's request tracker
347              
348             L
349              
350             =item * AnnoCPAN: Annotated CPAN documentation
351              
352             L
353              
354             =item * CPAN Ratings
355              
356             L
357              
358             =item * Search CPAN
359              
360             L
361              
362             =back
363              
364             =head1 COPYRIGHT & LICENSE
365              
366             Copyright 2008 Zoffix Znet, all rights reserved.
367              
368             This program is free software; you can redistribute it and/or modify it
369             under the same terms as Perl itself.
370              
371             =cut
372