File Coverage

blib/lib/CPAN/WWW/Top100/Retrieve.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             # Declare our package
2             package CPAN::WWW::Top100::Retrieve;
3 1     1   1402 use strict; use warnings;
  1     1   1  
  1         29  
  1         5  
  1         2  
  1         26  
4              
5             # Initialize our version
6 1     1   4 use vars qw( $VERSION );
  1         1  
  1         39  
7             $VERSION = '0.01';
8              
9             # import the Moose stuff
10 1     1   381 use Moose;
  0            
  0            
11             use MooseX::StrictConstructor;
12             use Moose::Util::TypeConstraints;
13             use Params::Coerce;
14             use namespace::autoclean;
15              
16             # get some utility stuff
17             use LWP::UserAgent;
18             use URI;
19             use HTML::TableExtract;
20              
21             use CPAN::WWW::Top100::Retrieve::Dist;
22             use CPAN::WWW::Top100::Retrieve::Utils qw( default_top100_uri dbids type2dbid dbid2type );
23              
24             has 'debug' => (
25             isa => 'Bool',
26             is => 'rw',
27             default => sub { 0 },
28             );
29              
30             has 'ua' => (
31             isa => 'LWP::UserAgent',
32             is => 'rw',
33             required => 0,
34             lazy => 1,
35             default => sub {
36             LWP::UserAgent->new;
37             },
38             );
39              
40             has 'error' => (
41             isa => 'Str',
42             is => 'ro',
43             writer => '_error',
44             );
45              
46             # Taken from Moose::Cookbook::Basics::Recipe5
47             subtype 'My::Types::URI' => as class_type('URI');
48              
49             coerce 'My::Types::URI'
50             => from 'Object'
51             => via {
52             $_->isa('URI') ? $_ : Params::Coerce::coerce( 'URI', $_ );
53             }
54             => from 'Str'
55             => via {
56             URI->new( $_, 'http' )
57             };
58              
59             has 'uri' => (
60             isa => 'My::Types::URI',
61             is => 'rw',
62             required => 0,
63             lazy => 1,
64             default => sub {
65             default_top100_uri();
66             },
67             coerce => 1,
68             );
69              
70             has '_data' => (
71             isa => 'HashRef',
72             is => 'ro',
73             default => sub { {} },
74             );
75              
76             sub _retrieve {
77             my $self = shift;
78              
79             # Do we already have data?
80             if ( keys %{ $self->_data } > 0 ) {
81             warn "Using cached data" if $self->debug;
82             return 1;
83             } else {
84             warn "Starting retrieve run" if $self->debug;
85             }
86              
87             # Okay, get the data via LWP
88             warn "LWP->get( " . $self->uri . " )" if $self->debug;
89             my $response = $self->ua->get( $self->uri );
90             if ( $response->is_error ) {
91             my $errstr = "LWP Error: " . $response->status_line . "\n" . $response->content;
92             $self->_error( $errstr );
93             warn $errstr if $self->debug;
94             return 0;
95             }
96              
97             # Parse it!
98             return $self->_parse( $response->content );
99             }
100              
101             sub _parse {
102             my $self = shift;
103             my $content = shift;
104              
105             # Get the tables!
106             foreach my $dbid ( sort { $a <=> $b } @{ dbids() } ) {
107             warn "Parsing dbid $dbid..." if $self->debug;
108              
109             my $table_error;
110             my $table = HTML::TableExtract->new( attribs => { id => "ds$dbid" }, error_handle => \$table_error );
111             $table->parse( $content );
112              
113             if ( ! $table->tables ) {
114             my $errstr = "Unable to parse table $dbid";
115             $errstr .= " $table_error" if length $table_error;
116             $self->_error( $errstr );
117             warn $errstr if $self->debug;
118             return 0;
119             }
120              
121             foreach my $ts ( $table->tables ) {
122             # Store it in our data struct!
123             my %cols;
124             foreach my $row ( $ts->rows ) {
125             if ( ! keys %cols ) {
126             # First row, the headers!
127             my $c = 0;
128             %cols = map { $_ => $c++ } @$row;
129             } else {
130             # Make the object!
131             my $obj = CPAN::WWW::Top100::Retrieve::Dist->new(
132             ## no critic ( ProhibitAccessOfPrivateData )
133             'dbid' => $dbid,
134             'type' => dbid2type( $dbid ),
135             'rank' => $row->[ $cols{ 'Rank' } ],
136             'author' => $row->[ $cols{ 'Author' } ],
137             'dist' => $row->[ $cols{ 'Distribution' } ],
138              
139             # ugly logic here, but needed to "collate" the different report types
140             'score' => ( exists $cols{ 'Dependencies' } ? $row->[ $cols{ 'Dependencies' } ] :
141             ( exists $cols{ 'Dependents' } ? $row->[ $cols{ 'Dependents' } ] :
142             ( exists $cols{ 'Score' } ? $row->[ $cols{ 'Score' } ] : undef ) ) ),
143             );
144              
145             push( @{ $self->_data->{ $dbid } }, $obj );
146             }
147             }
148             }
149             }
150              
151             return 1;
152             }
153              
154             sub list {
155             my $self = shift;
156             my $type = shift;
157              
158             return if ! defined $type or ! length $type;
159             $type = type2dbid( lc( $type ) );
160             return if ! defined $type;
161              
162             # if we haven't retrieved yet, do it!
163             return if ! $self->_retrieve;
164              
165             # Generate a copy of our data
166             my @r = ( @{ $self->_data->{ $type } } );
167             return \@r;
168             }
169              
170             # from Moose::Manual::BestPractices
171             no Moose;
172             __PACKAGE__->meta->make_immutable;
173              
174             1;
175             __END__
176              
177             =for stopwords Top100 AnnoCPAN CPANTS Kwalitee RT com diff dists github ua uri
178              
179             =head1 NAME
180              
181             CPAN::WWW::Top100::Retrieve - Retrieves the CPAN Top100 data from http://ali.as/top100
182              
183             =head1 SYNOPSIS
184              
185             #!/usr/bin/perl
186             use strict; use warnings;
187              
188             use CPAN::WWW::Top100::Retrieve;
189             use Data::Dumper;
190              
191             my $top100 = CPAN::WWW::Top100::Retrieve->new;
192             print Dumper( $top100->list( 'heavy' ) );
193              
194             =head1 DESCRIPTION
195              
196             This module retrieves the data from CPAN Top100 and returns it in a structured format.
197              
198             =head2 Constructor
199              
200             This module uses Moose, so you can pass either a hash or hashref to the constructor. The object will cache all
201             data relevant to the Top100 for as long as it's alive. If you want to get fresh data just make a new object and
202             use that.
203              
204             The attributes are:
205              
206             =head3 debug
207              
208             ( not required )
209              
210             A boolean value specifying debug warnings or not.
211              
212             =head3 ua
213              
214             ( not required )
215              
216             The LWP::UserAgent object to use in place of the default one.
217              
218             The default is:
219              
220             LWP::UserAgent->new;
221              
222             =head3 uri
223              
224             ( not required )
225              
226             The uri of Top100 data we should use to retrieve data in place of the default one.
227              
228             The default is:
229              
230             CPAN::WWW::Top100::Retrieve::Utils::default_top100_uri()
231              
232             =head2 Methods
233              
234             Currently, there is only one method: list(). You call this and get the arrayref of data back. For more
235             information please look at the L<CPAN::WWW::Top100::Retrieve::Dist> class. You can call list() as
236             many times as you want, no need to re-instantiate the object for each query.
237              
238             =head3 list
239              
240             Takes one argument: the $type of Top100 list and returns an arrayref of dists.
241              
242             WARNING: list() will return an empty list if errors happen. Please look at the error() method for the string.
243              
244             Example:
245              
246             use Data::Dumper;
247             print Dumper( $top100->list( 'heavy' ) );
248             print Dumper( $top100->list( 'volatile' ) );
249              
250             =head3 error
251              
252             Returns the error string if it was set, undef if not.
253              
254             =head1 SEE ALSO
255              
256             L<CPAN::WWW::Top100::Retrieve::Dist>
257              
258             L<CPAN::WWW::Top100::Retrieve::Utils>
259              
260             =head1 SUPPORT
261              
262             You can find documentation for this module with the perldoc command.
263              
264             perldoc CPAN::WWW::Top100::Retrieve
265              
266             =head2 Websites
267              
268             =over 4
269              
270             =item * Search CPAN
271              
272             L<http://search.cpan.org/dist/CPAN-WWW-Top100-Retrieve>
273              
274             =item * AnnoCPAN: Annotated CPAN documentation
275              
276             L<http://annocpan.org/dist/CPAN-WWW-Top100-Retrieve>
277              
278             =item * CPAN Ratings
279              
280             L<http://cpanratings.perl.org/d/CPAN-WWW-Top100-Retrieve>
281              
282             =item * CPAN Forum
283              
284             L<http://cpanforum.com/dist/CPAN-WWW-Top100-Retrieve>
285              
286             =item * RT: CPAN's Request Tracker
287              
288             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=CPAN-WWW-Top100-Retrieve>
289              
290             =item * CPANTS Kwalitee
291              
292             L<http://cpants.perl.org/dist/overview/CPAN-WWW-Top100-Retrieve>
293              
294             =item * CPAN Testers Results
295              
296             L<http://cpantesters.org/distro/C/CPAN-WWW-Top100-Retrieve.html>
297              
298             =item * CPAN Testers Matrix
299              
300             L<http://matrix.cpantesters.org/?dist=CPAN-WWW-Top100-Retrieve>
301              
302             =item * Git Source Code Repository
303              
304             This code is currently hosted on github.com under the account "apocalypse". Please feel free to browse it
305             and pull from it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull
306             from your repository :)
307              
308             L<http://github.com/apocalypse/perl-cpan-www-top100-retrieve>
309              
310             =back
311              
312             =head2 Bugs
313              
314             Please report any bugs or feature requests to C<bug-cpan-www-top100-retrieve at rt.cpan.org>, or through
315             the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=CPAN-WWW-Top100-Retrieve>. I will be
316             notified, and then you'll automatically be notified of progress on your bug as I make changes.
317              
318             =head1 AUTHOR
319              
320             Apocalypse E<lt>apocal@cpan.orgE<gt>
321              
322             =head1 COPYRIGHT AND LICENSE
323              
324             Copyright 2010 by Apocalypse
325              
326             This library is free software; you can redistribute it and/or modify
327             it under the same terms as Perl itself.
328              
329             The full text of the license can be found in the LICENSE file included with this module.
330              
331             =cut