File Coverage

lib/Web/Scraper/Citations.pm
Criterion Covered Total %
statement 34 34 100.0
branch n/a
condition n/a
subroutine 11 11 100.0
pod 1 1 100.0
total 46 46 100.0


line stmt bran cond sub pod time code
1             package Web::Scraper::Citations;
2              
3 1     1   12469 use warnings;
  1         2  
  1         33  
4 1     1   4 use strict;
  1         1  
  1         20  
5 1     1   3 use Carp;
  1         3  
  1         56  
6 1     1   410 use utf8;
  1         6  
  1         2  
7              
8 1     1   331 use version; our $VERSION = qv('0.0.1');
  1         1094  
  1         4  
9              
10 1     1   449 use Mojo::UserAgent;
  1         198002  
  1         7  
11 1     1   24 use Mojo::DOM;
  1         1  
  1         12  
12 1     1   451 use Moose;
  1         280269  
  1         6  
13              
14             # Module implementation here
15             has 'id' => ( is => 'ro', isa => 'Str' );
16             has 'citations' => ( is => 'ro', isa => 'Int' );
17             has 'citations_last5' => ( is => 'ro', isa => 'Int' );
18             has 'h' => ( is => 'ro', isa => 'Int' );
19             has 'h_last5' => ( is => 'ro', isa => 'Int' );
20             has 'i10' => ( is => 'ro', isa => 'Int' );
21             has 'i10_last5' => ( is => 'ro', isa => 'Int' );
22             has 'name' => ( is => 'ro', isa => 'Str' );
23             has 'affiliation'=> ( is => 'ro', isa => 'Str' );
24              
25 1     1   4411 use constant STAT_NAMES => qw( citations citations_last5 h h_last5 i10 i10_last5 );
  1         1  
  1         283  
26              
27             # Mojo functions
28             around BUILDARGS => sub {
29             my $orig = shift;
30             my $class = shift;
31            
32             if ( @_ == 1 && !ref $_[0] ) {
33             my %object;
34             if ( $_[0] =~ /user=(\w+)/ ) {
35             $object{'id'} = $1;
36             } else {
37             $object{'id'} = $_[0];
38             }
39             my $url = Mojo::URL->new("http://scholar.google.com/citations?user=$object{'id'}");
40             my $ua = Mojo::UserAgent->new( max_redirects => 5 );
41             my $dom = $ua->get( $url )->res->dom or die "$! does not exist";
42             $object{'name'} = $dom->at("#gsc_prf_in")->text;
43             $object{'affiliation'} = $dom->at( ".gsc_prf_il" )->text;
44             my @dom_stats = $dom->find(".gsc_rsb_std")->map('text')->each;
45             for my $stat ( STAT_NAMES ) {
46             $object{$stat} = shift @dom_stats;
47             }
48            
49             return $class->$orig( %object );
50             }
51             else {
52             return $class->$orig(@_);
53             }
54             };
55              
56             sub profile_stats {
57 1     1 1 2 my $self = shift;
58 1         1 my %stats;
59 1         23 map( $stats{$_} = $self->$_, STAT_NAMES );
60 1         6 return \%stats;
61             }
62              
63              
64 1     1   5 no Moose;
  1         1  
  1         5  
65             __PACKAGE__->meta->make_immutable;
66            
67              
68             "To an infinite H and beyond"; # Magic true value required at end of module
69             __END__
70              
71             =head1 NAME
72              
73             Web::Scraper::Citations - Scrapes Google Scholar profiles for citations and stuff
74              
75             =head1 VERSION
76              
77             This document describes Web::Scraper::Citations version 0.0.1
78              
79             =head1 SYNOPSIS
80              
81             use Web::Scraper::Citations;
82              
83             my $author_profile = new Web::Scraper::Citations( $url_or_author_id );
84              
85             say "This champ has got an h of ", $author_profile->h(), " and ", $author_profile->citations();
86            
87            
88             =head1 DESCRIPTION
89              
90             This scraper downloads information from Google Scholar profiles at
91             L<http://scholar.google.com/citations>. Scraping is limited by google
92             to 2500 a day, so be careful with it and don't binge-scrape.
93              
94              
95             =head1 INTERFACE
96              
97             =head2 new ( $url_or_author_id )
98              
99             Creates a new object from the Google Citations URL or author ID (which is part of the URL anyway)
100              
101             =head2 h, h_last5, citations, citations_last5, i10, i10_last5, name, affiliation
102              
103             Returns the h index, citations, and number of papers with more than 10
104             citations, absolute or in the last 5 years.
105              
106             =head2 profile_stats
107              
108             Returns a hashref with all stats above, names as keys.
109              
110             =head2 STAT_NAMES
111              
112             Constant with the names of the stats we are interested in.
113              
114             =head1 CONFIGURATION AND ENVIRONMENT
115              
116             Web::Scraper::Citations requires no configuration files or environment variables.
117              
118             =head1 BUGS AND LIMITATIONS
119              
120             No bugs have been reported.
121              
122             Please report any bugs or feature requests to
123             C<bug-web-scraper-citations@rt.cpan.org>, or through the web interface at
124             L<http://rt.cpan.org>.
125              
126              
127             =head1 AUTHOR
128              
129             JJ C<< <JMERELO@cpan.org> >>
130              
131              
132             =head1 LICENCE AND COPYRIGHT
133              
134             Copyright (c) 2015, JJ C<< <JMERELO@cpan.org> >>. All rights reserved.
135              
136             This module is free software; you can redistribute it and/or
137             modify it under the GPL v3.
138              
139              
140             =head1 DISCLAIMER OF WARRANTY
141              
142             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
143             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
144             OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
145             PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
146             EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
147             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
148             ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
149             YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
150             NECESSARY SERVICING, REPAIR, OR CORRECTION.
151              
152             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
153             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
154             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
155             LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
156             OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
157             THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
158             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
159             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
160             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
161             SUCH DAMAGES.