File Coverage

blib/lib/WWW/Scraper/ISBN/ISBNdb_Driver.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package WWW::Scraper::ISBN::ISBNdb_Driver;
2              
3 7     7   142918 use strict;
  7         15  
  7         247  
4 7     7   31 use warnings;
  7         10  
  7         299  
5              
6             our $VERSION = '0.11';
7              
8             #--------------------------------------------------------------------------
9              
10             ###########################################################################
11             # Inheritence
12              
13 7     7   32 use base qw(WWW::Scraper::ISBN::Driver);
  7         38  
  7         3263  
14              
15             ###########################################################################
16             # Modules
17              
18 7     7   8134 use IO::File;
  7         48493  
  7         847  
19 7     7   4909 use LWP::UserAgent;
  7         266473  
  7         247  
20 7     7   5851 use XML::LibXML;
  0            
  0            
21             use Carp;
22              
23             ###########################################################################
24             # Variables
25              
26             our $ACCESS_KEY = undef;
27             our $user_agent = new LWP::UserAgent();
28              
29             my $API_VERSION = 'v1';
30             my $IN2MM = 0.0393700787; # number of inches in a millimetre (mm)
31             my $LB2G = 0.00220462; # number of pounds (lbs) in a gram
32              
33             my %editions = (
34             '(pbk.)' => 'Paperback',
35             '(electronic bk.)' => 'eBook'
36             );
37              
38             my %api_paths = (
39             'v1' => { format => 'http://isbndb.com/api/%s.xml?access_key=%s&index1=%s&results=%s&value1=%s', fields => [ qw( search_type access_key search_field results_type search_param ) ] },
40             'v2' => { format => 'http://isbndb.com/api/v2/xml/%s/%s/%s', fields => [ qw( access_key search_type search_param ) ] }
41             );
42              
43             #--------------------------------------------------------------------------
44              
45             =head1 NAME
46              
47             WWW::Scraper::ISBN::ISBNdb_Driver - Search driver for the isbndb.com online book catalog
48              
49             =head1 SYNOPSIS
50              
51             use WWW::Scraper::ISBN;
52             my $scraper = new WWW::Scraper::ISBN();
53             $scraper->drivers( qw/ ISBNdb / );
54             $WWW::Scraper::ISBN::ISBNdb_Driver::ACCESS_KEY = 'xxxx'; # Your isbndb.com access key
55              
56             my $isbn = '0596101058';
57             my $result = $scraper->search( $isbn );
58              
59             if( $result->found ) {
60             my $book = $result->book;
61             print "ISBN: ", $book->{isbn}, "\n";
62             print "Title: ", $book->{title}, "\n";
63             print "Author: ", $book->{author}, "\n";
64             print "Publisher: ", $book->{publisher}, "\n";
65             }
66              
67             =head1 DESCRIPTION
68              
69             This is a WWW::Scraper::ISBN driver that pulls data from
70             L. Consult L for usage
71             details.
72              
73             =cut
74              
75             #--------------------------------------------------------------------------
76              
77             ###########################################################################
78             # Public Interface
79              
80             sub search {
81             my( $self, $isbn ) = @_;
82             $self->found(0);
83             $self->book(undef);
84              
85             my %book;
86             if($API_VERSION eq 'v1') {
87             my( $details, $details_url ) = $self->_fetch( 'books', 'isbn' => $isbn, 'details' );
88             my( $authors, $authors_url ) = $self->_fetch( 'books', 'isbn' => $isbn, 'authors' );
89              
90             return unless $details && $self->_contains_book_data($details);
91              
92             %book = (
93             book_link => $details_url,
94             );
95              
96             $self->_get_pubdata(\%book,$details);
97             $self->_get_details(\%book,$details);
98             $self->_get_authors(\%book,$authors);
99             } else {
100             my( $details, $details_url ) = $self->_fetch( 'book', 'isbn' => $isbn, 'details' );
101              
102             return unless $details && $self->_contains_book_data($details);
103              
104             %book = (
105             book_link => $details_url,
106             );
107              
108             $self->_get_details_v2(\%book,$details);
109             }
110              
111             $self->_trim(\%book);
112              
113             $self->book(\%book);
114             $self->found(1);
115             return $self->book;
116             }
117              
118             ###########################################################################
119             # Private Interface
120              
121             sub _contains_book_data {
122             my( $self, $doc ) = @_;
123             return $doc->getElementsByTagName('BookData')->size > 0 if($API_VERSION eq 'v1');
124             return $doc->getElementsByTagName('data')->size > 0;
125             }
126              
127             #
128             #
129             #
130             #Learning Perl
131             #
132             #Randal L. Schwartz, Tom Phoenix and brian d foy
133             #Sebastopol, CA : O\'Reilly, c2005.
134             #
135             #Schwartz, Randal L.
136             #Tom Phoenix
137             #brian d foy
138             #
139             #
140             #
141             #
142              
143             sub _get_authors {
144             my( $self, $book, $authors ) = @_;
145             my $people = $authors->findnodes('//Authors/Person');
146             my @people;
147             for( my $i = 0; $i < $people->size; $i++ ) {
148             my $person = $people->get_node($i);
149             push @people, $person->to_literal;
150             }
151              
152             my $str = join '; ', @people;
153             $str =~ s/^\s+//;
154             $str =~ s/\s+$//;
155              
156             $book->{author} = $str;
157             }
158              
159             sub _get_pubdata {
160             my( $self, $book, $doc ) = @_;
161              
162             my $pubtext = $doc->findnodes('//PublisherText')->to_literal;
163             my $details = $doc->findnodes('//Details/@edition_info')->to_literal;
164              
165             my $year = '';
166             if( $pubtext =~ /(\d{4})/ ) { $year = $1 }
167             elsif( $details =~ /(\d{4})/ ) { $year = $1 }
168              
169             $book->{pubdate} = $year || '';
170             $book->{publisher} = '';
171              
172             my $pub_id = ($doc->findnodes('//PublisherText/@publisher_id'))[0]->to_literal;
173              
174             if($pub_id) {
175             my $publisher = $self->_fetch( 'publishers', 'publisher_id', $pub_id, 'details' );
176             my $data = ($publisher->findnodes('//PublisherData'))[0];
177              
178             $book->{publisher} = ($data->findnodes('//Name'))[0]->to_literal;
179             }
180             }
181              
182             #
183             #
184             #
185             #Learning Perl
186             #
187             #Randal L. Schwartz, Tom Phoenix and brian d foy
188             #Sebastopol, CA : O\'Reilly, c2005.
189             #
190             #
191             #
192             #
193              
194             sub _get_details {
195             my( $self, $book, $doc ) = @_;
196              
197             my $isbn10 = $doc->findnodes('//BookData/@isbn')->to_literal;
198             my $isbn13 = $doc->findnodes('//BookData/@isbn13')->to_literal;
199              
200             $book->{isbn} = $isbn13;
201             $book->{ean13} = $isbn13;
202             $book->{isbn13} = $isbn13;
203             $book->{isbn10} = $isbn10;
204              
205             my $long_title = eval { ($doc->findnodes('//TitleLong'))[0]->to_literal };
206             my $short_title = eval { ($doc->findnodes('//Title'))[0]->to_literal };
207             $book->{title} = $long_title || $short_title;
208              
209             my $edition = $doc->findnodes('//Details/@edition_info')->to_literal;
210             my $desc = $doc->findnodes('//Details/@physical_description_text')->to_literal;
211             my $dewey = $doc->findnodes('//Details/@dewey_decimal')->to_literal;
212              
213             my ($binding,$date) = $edition =~ /([^;]+);(.*)/;
214             my (@size) = $desc =~ /([\d\.]+)"x([\d\.]+)"x([\d\.]+)"/;
215             my ($weight) = $desc =~ /([\d\.]+) lbs?/;
216             my ($pages) = $desc =~ /(\d) pages/;
217             ($pages) = $desc =~ /(\d+) p\./ unless($pages);
218              
219             my ($height,$width,$depth) = sort {$b <=> $a} @size;
220              
221             $book->{height} = int($height / $IN2MM) if($height);
222             $book->{width} = int($width / $IN2MM) if($width);
223             $book->{depth} = int($depth / $IN2MM) if($depth);
224             $book->{weight} = int($weight / $LB2G) if($weight);
225             $book->{pubdate} = $date if($date);
226             $book->{binding} = $editions{$edition} || $binding || $edition;
227             $book->{pages} = $pages;
228             $book->{dewey} = "$dewey";
229             }
230              
231             #
232             #
233             #
234             # schwartz_randal_l
235             # Schwartz, Randal L.
236             #
237             #
238             # tom_phoenix
239             # Tom Phoenix
240             #
241             #
242             # brian_d_foy
243             # brian d foy
244             #
245             #
246             # learning_perl_a04
247             # 005
248             # 5
249             # Paperback; 2005-07-01
250             # 1600330207
251             # 9781600330209
252             #
253             #
254             # ~
255             #
256             # 7.0"x9.2"x0.9"; 1.3 lb; 704 pages
257             # oreilly_media
258             # O\'Reilly Media
259             # O\'Reilly Media
260             # computers_internet_programming_introductory_beginning_genera
261             # computers_internet_programming_languages_tools_general
262             # computers_internet_programming_general
263             # computers_internet_web_development_programming_general
264             # "Learning Perl, better known as "the Llama book," starts the programmer on the way to mastery. Written by three prominent members of the Perl community who each have several years of experience teaching Perl around the world, this edition has been updated to account for all the recent changes to the language up to Perl 5.8. Perl is the language for people who want to get work done. It started as a tool for Unix system administrators who needed something powerful for small tasks. Since then, Perl has blossomed into a full-featured programming language used for web programming, database manipulation, XML processing, and system administration--on practically all platforms--while remaining the favorite tool for the small daily tasks it was designed for. You might start using Perl because you need it, but you\'ll continue to use it because you love it. Informed by their years of success at teaching Perl as consultants, the authors have re-engineered the Llama to better match the pace and scope appropriate for readers getting started with Perl, while retaining the detailed discussion, thorough examples, and eclectic wit for which the Llama is famous. The book includes new exercises and solutions so you can practice what you\'ve learned while it\'s still fresh in your mind. Here are just some of the topics covered: Perl variable types subroutines file operations regular expressions text processing strings and sorting process management using third party modules If you ask Perl programmers today what book they relied on most when they were learning Perl, you\'ll find that an overwhelming majority will point to the Llama. With good reason. Other books mayteach you to program in Perl, but this book will turn you into a Perl programmer.
265             # Learning Perl
266             # Learning Perl
267             # Learning Perl (4th Edition)
268             #
269             #
270             # isbn
271             #
272              
273             sub _get_details_v2 {
274             my( $self, $book, $doc ) = @_;
275              
276             my $people = $doc->findnodes('//data/author_data/name');
277             my @people;
278             for( my $i = 0; $i < $people->size; $i++ ) {
279             my $person = $people->get_node($i);
280             push @people, $person->to_literal;
281             }
282              
283             my $str = join '; ', @people;
284             $str =~ s/^\s+//;
285             $str =~ s/\s+$//;
286              
287             $book->{author} = $str;
288            
289             $book->{publisher} = ($doc->findnodes('//data/publisher_name'))[0]->to_literal;
290              
291             my $isbn10 = $doc->findnodes('//data/isbn10')->to_literal;
292             my $isbn13 = $doc->findnodes('//data/isbn13')->to_literal;
293              
294             $book->{isbn} = $isbn13;
295             $book->{ean13} = $isbn13;
296             $book->{isbn13} = $isbn13;
297             $book->{isbn10} = $isbn10;
298              
299             my $long_title = eval { ($doc->findnodes('//data/title_long'))[0]->to_literal };
300             my $short_title = eval { ($doc->findnodes('//data/title'))[0]->to_literal };
301             $book->{title} = $long_title || $short_title;
302              
303             my $pubtext = $doc->findnodes('//data/publisher_text')->to_literal;
304             my $edition = $doc->findnodes('//data/edition_info')->to_literal;
305             my $desc = $doc->findnodes('//data/physical_description_text')->to_literal;
306             my $dewey = $doc->findnodes('//data/dewey_decimal')->to_literal;
307             my $summary = $doc->findnodes('//data/summary')->to_literal;
308              
309             my ($binding,$date) = $edition =~ /([^;]+);(.*)/;
310             my (@size) = $desc =~ /([\d\.]+)"x([\d\.]+)"x([\d\.]+)"/;
311             my ($weight) = $desc =~ /([\d\.]+) lbs?/;
312             my ($pages) = $desc =~ /(\d+) pages/;
313             ($pages) = $desc =~ /(\d+) p\./ unless($pages);
314              
315             if( !$date && $pubtext =~ /(\d{4})/ ) { $date = $1 }
316             if( !$date && $edition =~ /(\d{4})/ ) { $date = $1 }
317              
318             my ($height,$width,$depth) = sort {$b <=> $a} @size;
319              
320             $book->{height} = int($height / $IN2MM) if($height);
321             $book->{width} = int($width / $IN2MM) if($width);
322             $book->{depth} = int($depth / $IN2MM) if($depth);
323             $book->{weight} = int($weight / $LB2G) if($weight);
324             $book->{pubdate} = $date if($date);
325             $book->{binding} = $editions{$edition} || $binding || $edition;
326             $book->{pages} = $pages;
327             $book->{dewey} = "$dewey";
328              
329             $book->{description} = $summary;
330             }
331              
332             #--------------------------------------------------------------------------
333              
334             sub _trim {
335             my( $self, $book ) = @_;
336              
337             for my $key (keys %$book) {
338             next unless($book->{$key});
339             $book->{$key} =~ s/^\s+//s; # remove leading spaces
340             $book->{$key} =~ s/\s+$//s; # remove trailing spaces
341             }
342             }
343              
344             sub _fetch {
345             my( $self, @args ) = @_;
346             my $parser = new XML::LibXML();
347             my $url = $self->_url( @args );
348             my $xml = $self->_fetch_data($url);
349             return unless($xml && $xml !~ /^/);
350              
351             my $doc = $parser->parse_string( $xml );
352             return wantarray ? ( $doc, $url ) : $doc;
353             }
354              
355             sub _fetch_data {
356             my( $self, $url ) = @_;
357             my $res = $user_agent->get($url);
358             return unless $res->is_success;
359             # use Data::Dumper;
360             # print STDERR "# data=" . Dumper($res);
361             return $res->content;
362             }
363              
364             sub _url {
365             my $self = shift;
366              
367             my $access_key = $self->_get_key();
368             croak "no access key provided" unless $access_key;
369              
370             my %hash = ( access_key => $access_key );
371             ($hash{search_type}, $hash{search_field}, $hash{search_param}, $hash{results_type}) = @_;
372              
373             my @values = map { $hash{$_} } @{ $api_paths{$API_VERSION}{fields} };
374             my $url = sprintf $api_paths{$API_VERSION}{format}, @values;
375              
376             # print STDERR "# url=$url\n";
377             return $url;
378             }
379              
380             sub _get_key {
381             return $ACCESS_KEY if($ACCESS_KEY);
382              
383             if($ENV{ISBNDB_ACCESS_KEY}) {
384             $ACCESS_KEY = $ENV{ISBNDB_ACCESS_KEY};
385             return $ACCESS_KEY;
386             }
387              
388             for my $dir ( ".", $ENV{HOME}, '~' ) {
389             my $file = join( '/', $dir, ".isbndb" );
390             next unless -e $file;
391              
392             my $fh = IO::File->new($file,'r') or next;
393             my $key;
394             $key .= $_ while(<$fh>);
395             $key =~ s/\s+//gs;
396             $fh->close;
397              
398             $ACCESS_KEY = $key;
399             return $ACCESS_KEY;
400             }
401             }
402              
403             sub _api_version {
404             my $version = shift;
405             $API_VERSION = $version if($version && $api_paths{$version});
406             return $API_VERSION;
407             }
408              
409             1;
410              
411             __END__