File Coverage

blib/lib/WWW/Search/Scrape.pm
Criterion Covered Total %
statement 41 46 89.1
branch 13 16 81.2
condition 4 6 66.6
subroutine 9 9 100.0
pod 1 1 100.0
total 68 78 87.1


line stmt bran cond sub pod time code
1             package WWW::Search::Scrape;
2              
3 2     2   40744 use warnings;
  2         6  
  2         70  
4 2     2   13 use strict;
  2         3  
  2         65  
5              
6 2     2   2169 use Data::Dumper;
  2         20373  
  2         140  
7             #use Smart::Comments;
8              
9 2     2   19 use Carp;
  2         3  
  2         118  
10              
11 2     2   1173 use WWW::Search::Scrape::Google;
  2         9  
  2         73  
12 2     2   1440 use WWW::Search::Scrape::Bing;
  2         7  
  2         60  
13 2     2   1081 use WWW::Search::Scrape::Yahoo;
  2         6  
  2         103  
14              
15             =head1 NAME
16              
17             WWW::Search::Scrape - Scrape search engine results
18              
19             =head1 VERSION
20              
21             Version 0.08
22              
23             =cut
24              
25             our $VERSION = '0.08';
26              
27              
28             =head1 SYNOPSIS
29              
30              
31             use WWW::Search::Scrape qw/:all/;
32             my $result = search({engine => 'google', keyword =>'keywords', results => 10});
33             print "Google returns " . $result->{num} . " results\n";
34             print $_, "\n" foreach (@{$result->{results}});
35              
36             =head1 DESCRIPTION
37              
38             Most search engines do not provide search API.
39              
40             Google finally stop its Google search API in Sept 2009, while the registration for it had already been disabled for years. Google AJAX API is not powerful enough.
41              
42             The purpose of this module is to provide a simple interface to extract top search results from Google search engines (as well as others), and keep this interface as simple as possible (as soon as possible as well).
43              
44             Currently, it supports English Google and Bing only. I schedule to add more functions soon.
45              
46             =head1 EXPORT
47              
48              
49             There is only one function in WWW::Search::Scrape -- search.
50              
51             =cut
52              
53 2     2   12 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  2         6  
  2         813  
54              
55             require Exporter;
56              
57             our @ISA = qw(Exporter);
58             our %EXPORT_TAGS = ('all' => [qw/search/]);
59             our @EXPORT_OK = (@{$EXPORT_TAGS{all}});
60             our @EXPORT = qw();
61              
62             =head1 FUNCTIONS
63              
64             =head2 search
65              
66             search is the most important function in this module. It is used as a dispatcher for corresponding search engines -- Google, Yahoo, Bing etc.
67              
68             It accepts a config hash. Possible keys are,
69              
70             +---------+--------------------------------------------------------+
71             | engine | The name for the search engine, like 'google', 'bing' |
72             +---------+--------------------------------------------------------+
73             | keyword | The keyword(s) for the searching terms |
74             +---------+--------------------------------------------------------+
75             | results | How many results should be returned (default: 10) |
76             +---------+--------------------------------------------------------+
77              
78              
79             It returns a hash ref
80              
81             +---------+-------------------------------------------------------------------------+
82             | num | How many items the search engine are able to return? (estimated number) |
83             +---------+-------------------------------------------------------------------------+
84             | results | List of returned results +
85             +---------+-------------------------------------------------------------------------+
86              
87             =cut
88              
89             sub search (%) {
90 8     8 1 476 my ($q) = @_;
91             ### search got query: $q
92 8 50       37 unless (ref($q) eq "HASH")
93             {
94 0         0 carp 'search query should be hash';
95 0         0 return undef;
96             }
97              
98 8 100       24 unless ($q->{engine})
99             {
100 1         130 carp 'engine not set.';
101 1         89 return undef;
102             }
103              
104 7 100 66     50 unless ($q->{results} && $q->{results} >= 0) {
105 2         257 carp 'search results number not set.';
106 2         189 return undef;
107             }
108              
109 5 100 66     39 unless ($q->{keyword} && length($q->{keyword}) >= 1) {
110 1         223 carp 'search keyword not set.';
111 1         193 return undef;
112             }
113              
114 4 100       26 if (lc $q->{engine} eq 'google') {
    50          
115 3 50       10 if ($q->{geo_location}) {
116 0         0 $WWW::Search::Scrape::Google::geo_location = $q->{geo_location};
117             }
118 3 100       11 if ($q->{frontpage}) {
119 2         9 $WWW::Search::Scrape::Google::frontpage = $q->{frontpage};
120             }
121 3         17 return WWW::Search::Scrape::Google::search($q->{keyword}, $q->{results});
122             } elsif (lc $q->{engine} eq 'bing') {
123 1         8 return WWW::Search::Scrape::Bing::search($q->{keyword}, $q->{results});
124             }
125             else {
126 0           carp 'Search engine ' . $q->{engine} . ' not implemented yet.';
127 0           return undef;
128             }
129             }
130              
131             =head1 AUTHOR
132              
133             Quan Sun, C<< >>
134              
135             =head1 BUGS
136              
137             Please report any bugs or feature requests to C, or C, or through
138             the web interface at L. I will be notified, and then you'll
139             automatically be notified of progress on your bug as I make changes.
140              
141              
142             L
143              
144              
145             =head1 SUPPORT
146              
147             You can find documentation for this module with the perldoc command.
148              
149             perldoc WWW::Search::Scrape
150              
151              
152             You can also look for information at:
153              
154             =over 4
155              
156             =item * RT: CPAN's request tracker
157              
158             L
159              
160             =item * AnnoCPAN: Annotated CPAN documentation
161              
162             L
163              
164             =item * CPAN Ratings
165              
166             L
167              
168             =item * Search CPAN
169              
170             L
171              
172             =back
173              
174              
175             =head1 ACKNOWLEDGEMENTS
176              
177              
178             =head1 COPYRIGHT & LICENSE
179              
180             Copyright 2009 Quan Sun.
181              
182             This program is free software; you can redistribute it and/or modify it
183             under the terms of either: the GNU General Public License as published
184             by the Free Software Foundation; or the Artistic License.
185              
186             See http://dev.perl.org/licenses/ for more information.
187              
188              
189             =cut
190              
191             1; # End of WWW::Search::Scrape