File Coverage

blib/lib/WWW/Scraper/ISBN/Yahoo_Driver.pm
Criterion Covered Total %
statement 37 72 51.3
branch 1 26 3.8
condition 6 42 14.2
subroutine 9 9 100.0
pod 1 1 100.0
total 54 150 36.0


!is; !is; !is;
line stmt bran cond sub pod time code
1             package WWW::Scraper::ISBN::Yahoo_Driver;
2              
3 6     6   103020 use strict;
  6         12  
  6         208  
4 6     6   23 use warnings;
  6         9  
  6         201  
5              
6 6     6   27 use vars qw($VERSION @ISA);
  6         11  
  6         421  
7             $VERSION = '0.23';
8              
9             #--------------------------------------------------------------------------
10              
11             =head1 NAME
12              
13             WWW::Scraper::ISBN::Yahoo_Driver - Search driver for Yahoo Books online catalog.
14              
15             =head1 SYNOPSIS
16              
17             See parent class documentation (L)
18              
19             =head1 DESCRIPTION
20              
21             Searches for book information from the Yahoo Books online catalog.
22              
23             =cut
24              
25             #--------------------------------------------------------------------------
26              
27             ###########################################################################
28             # Inheritence
29              
30 6     6   24 use base qw(WWW::Scraper::ISBN::Driver);
  6         8  
  6         2683  
31              
32             ###########################################################################
33             # Modules
34              
35 6     6   4447 use WWW::Scraper::ISBN::Driver;
  6         10  
  6         115  
36 6     6   4296 use WWW::Mechanize;
  6         762771  
  6         263  
37              
38             ###########################################################################
39             # Constants
40              
41 6     6   52 use constant YAHOO => 'http://shopping.yahoo.com';
  6         8  
  6         377  
42 6     6   22 use constant SEARCH => 'https://shopping.yahoo.com/search?fr=yshoppingheader_test2&type=2button&did=0&p=';
  6         12  
  6         5021  
43              
44             #--------------------------------------------------------------------------
45              
46             ###########################################################################
47             # Public Interface
48              
49             =head1 METHODS
50              
51             =over 4
52              
53             =item C
54              
55             Creates a query string, then passes the appropriate form fields to the
56             Yahoo Books server.
57              
58             The returned page should be the correct catalog page for that ISBN. If not the
59             function returns zero and allows the next driver in the chain to have a go. If
60             a valid page is returned, the following fields are returned via the book hash:
61              
62             isbn (now returns isbn13)
63             isbn10 (no longer provided by Yahoo on page)
64             isbn13
65             ean13 (industry name)
66             title
67             author
68             pubdate (no longer provided by Yahoo on page)
69             publisher
70             book_link
71             image_link
72             thumb_link (same as image_link)
73             description
74             binding (if known)
75             pages (no longer provided by Yahoo on page)
76             weight (no longer provided by Yahoo on page)
77             width (no longer provided by Yahoo on page)
78             height (no longer provided by Yahoo on page)
79              
80             The book_link and image_link refer back to the Yahoo Books website.
81              
82             =back
83              
84             =cut
85              
86             sub search {
87 1     1 1 206 my $self = shift;
88 1         2 my $isbn = shift;
89 1         5 $self->found(0);
90 1         20 $self->book(undef);
91 1         7 my $data = {};
92              
93             # validate and convert into EAN13 format
94 1         5 my $ean = $self->convert_to_ean13($isbn);
95 1 50 33     40 return $self->handler("Invalid ISBN specified [$isbn]")
      33        
      33        
      33        
96             if(!$ean || (length $isbn == 13 && $isbn ne $ean)
97             || (length $isbn == 10 && $isbn ne $self->convert_to_isbn10($ean)));
98 1         28 $isbn = $ean;
99              
100 1         4 my $mech = WWW::Mechanize->new;
101 1         11492 $mech->agent_alias( 'Linux Mozilla' );
102              
103 1         71 eval { $mech->get( YAHOO . '/books' ) };
  1         4  
104 1 0 33     82284 return $self->handler("Yahoo! book website appears to be unavailable.")
      33        
105             if($@ || !$mech->success() || !$mech->content());
106              
107 0           eval { $mech->get( SEARCH . $isbn ) };
  0            
108 0 0 0       return $self->handler("Yahoo! book search website appears to be unavailable.")
      0        
109             if($@ || !$mech->success() || !$mech->content());
110              
111              
112             # The Results page
113 0           my $content = $mech->content();
114             #print STDERR "\n# results=[\n$content\n]\n";
115              
116 0           my ($link,$thumb) = $content =~ m!
117             #print STDERR "\n# link=[$link]\n# thumb=[$thumb]\n";
118              
119 0 0         return $self->handler("Failed to find that book on Yahoo! book website.")
120             unless(defined $link);
121              
122 0           $data->{book_link} = $link;
123              
124 0           eval { $mech->get( $data->{book_link} ) };
  0            
125 0 0 0       return $self->handler("Yahoo! book search website appears to be unavailable.")
      0        
126             if($@ || !$mech->success() || !$mech->content());
127              
128              
129             # The Book page
130 0           my $html = $mech->content();
131             #print STDERR "\n# page=[\n$html\n]\n";
132              
133 0 0         return $self->handler("Could not extract data from Yahoo! result page.")
134             unless($html =~ m!\s*!);
135              
136 0           ($data->{image_link}) = $html =~ m!!is;
137 0   0       $data->{image_link} ||= $thumb;
138              
139 0           ($data->{title}) = $html =~ m!]*>([^<]*)!is;
140 0 0         ($data->{title}) = $html =~ m!!is unless($data->{title});
141              
142 0           ($data->{description}) = $html =~ m!
(.*?)
!is;
143 0 0         ($data->{description}) = $html =~ m!!is unless($data->{description});
144              
145 0           ($data->{publisher}) = $html =~ m!Publisher([^<]+)
146 0           ($data->{binding}) = $html =~ m!Book Format([^<]+)
147 0           ($data->{author}) = $html =~ m!Author([^<]+)
148              
149 0 0 0       ($data->{publisher},$data->{pubdate})
150             = $data->{description} =~ m!This book is written by (?:[\s\w]+) Published by ([\s\w]+) In (\d+) and!is if(!$data->{publisher} && $data->{description});
151 0 0 0       ($data->{binding}) = $data->{description} =~ m!This book is written by (?:[\s\w]+) Published by (?:[\s\w]+) In (?:\d+) and is available in ([\w]+)!is
152             if(!$data->{binding} && $data->{description});
153 0 0 0       ($data->{author}) = $data->{description} =~ m!This book is written by ([\s\w]+) Published by!is if(!$data->{author} && $data->{description});
154              
155 0           ($data->{isbn13}) = $ean;
156 0           ($data->{isbn10}) = $self->convert_to_isbn10($ean);
157              
158 0 0         return $self->handler("Could not extract data from Yahoo! result page.")
159             unless(defined $data);
160              
161             # trim top and tail
162 0 0         foreach (keys %$data) { next unless(defined $data->{$_});$data->{$_} =~ s/^\s+//;$data->{$_} =~ s/\s+$//; }
  0            
  0            
  0            
163              
164 0           my $bk = {
165             'isbn13' => $data->{isbn13},
166             'isbn10' => $data->{isbn10},
167             'isbn' => $data->{isbn13},
168             'ean13' => $data->{isbn13},
169             'author' => $data->{author},
170             'title' => $data->{title},
171             'book_link' => $data->{book_link},
172             'image_link' => $thumb,
173             'thumb_link' => $thumb,
174             'description' => $data->{description},
175             'publisher' => $data->{publisher},
176             'pubdate' => $data->{pubdate},
177             'binding' => $data->{binding},
178             'pages' => $data->{pages},
179             'weight' => $data->{weight},
180             'width' => $data->{width},
181             'height' => $data->{height},
182             'depth' => $data->{depth},
183             'html' => $html
184             };
185 0           $self->book($bk);
186 0           $self->found(1);
187 0           return $self->book;
188             }
189              
190             q{currently listening to: 'Drunk In Public' by The Levellers};
191              
192             __END__