File Coverage

blib/lib/Yahoo/Search/Request.pm
Criterion Covered Total %
statement 49 133 36.8
branch 10 58 17.2
condition 3 8 37.5
subroutine 11 17 64.7
pod 9 10 90.0
total 82 226 36.2


line stmt bran cond sub pod time code
1             package Yahoo::Search::Request;
2 2     2   13 use strict;
  2         5  
  2         74  
3              
4 2     2   9031 use Yahoo::Search::Response;
  2         7  
  2         77  
5 2     2   1741 use Yahoo::Search::XML;
  2         6  
  2         70  
6 2     2   3196 use LWP::UserAgent;
  2         128744  
  2         77  
7 2     2   24 use HTTP::Request;
  2         4  
  2         45  
8 2     2   10 use URI;
  2         4  
  2         3719  
9              
10             our $VERSION = "20100614.1"; # just to make CPAN happy
11              
12              
13             my $have_XML_Simple; # undef means 'not yet tested'
14             sub _have_XML_Simple
15             {
16 0 0   0   0 if (not defined $have_XML_Simple) {
17             # test whether XML::Simple is installed
18 0 0       0 if (eval { require XML::Simple; 1 }) {
  0         0  
  0         0  
19 0         0 $have_XML_Simple = 1;
20             } else {
21 0         0 $have_XML_Simple = 0;
22             }
23             }
24 0         0 return $have_XML_Simple;
25             }
26              
27              
28             =head1 NAME
29              
30              
31             =head1 VERSION
32              
33             version 1.11.3
34             Yahoo::Search::Request -- Container object for a Yahoo! Search request.
35             (This package is included in, and automatically loaded by, the Yahoo::Search package.)
36              
37             =head1 Package Use
38              
39             You never need to C this package directly -- it is loaded
40             automatically by Yahoo::Search.
41              
42             =head1 Object Creation
43              
44             In practice, this class is generally not dealt with explicitly, but rather
45             implicitly via functions in Yahoo::Search such as C and C,
46             which build and use a C object under the hood.
47              
48             You also have access to the C object via C method of
49             resulting C and C objects.
50              
51             To be clear, C objects are created by the C method of a
52             Search Engine object (Yahoo::Search).
53              
54             =cut
55              
56             sub new
57             {
58 3     3 0 7 my $class = shift;
59 3         182 my %Args = @_;
60              
61             ##
62             ## Don't want to keep any arg that begins with '_' (e.g. _Url).
63             ##
64 3         14 for my $key (grep { /^_/ } keys %Args) {
  18         47  
65 0         0 delete $Args{$key};
66             }
67              
68 3         40 return bless \%Args, $class;
69             }
70              
71              
72             =head1 Methods
73              
74             A C object provides the following methods:
75              
76             =over 4
77              
78             =cut
79              
80             ###########################################################################
81              
82             =item $Request->Uri
83              
84             Returns the URI::http object representing the url fetched (or to be
85             fetched) from Yahoo's Search servers. The url is actually fetched when the
86             C object's C method is called.
87              
88             Note that this does I reflect the fact that a request is changed to a
89             POST when request is sufficiently large. Thus, there are times when the url
90             represented by the URI::http object returned is not actually fetchable from
91             the Yahoo! servers.
92              
93             =cut
94              
95             sub Uri
96             {
97 3     3 1 5 my $Request = shift; # self
98              
99 3 50       12 if (not $Request->{_Uri})
100             {
101             ##
102             ## Create the URI (action + query string)
103             ##
104 3         29 $Request->{_Uri} = URI->new($Request->{Action}, "http");
105 3         25272 $Request->{_Uri}->query_form(%{$Request->{Params}});
  3         54  
106             }
107 3         1389 return $Request->{_Uri};
108             }
109              
110              
111              
112             ###########################################################################
113              
114             =item $Request->Url
115              
116             Like the C method, but returns a string with the full url
117             fetched (or to be fetched).
118              
119             Note that this does I reflect the fact that a request is changed to a
120             POST when request is sufficiently large. Thus, there are times when the url
121             returned is not actually fetchable from the Yahoo! servers.
122              
123             =cut
124              
125             sub Url
126             {
127 3     3 1 7 my $Request = shift; # self
128 3         13 return $Request->Uri->as_string;
129             }
130              
131              
132              
133             ###########################################################################
134              
135             =item $Request->SearchSpace
136              
137             Returns the search space the request represents (I, I, etc.)
138              
139             =cut
140              
141              
142             sub SearchSpace
143             {
144 0     0 1 0 my $Request = shift; # self
145 0         0 return $Request->{Space}
146             }
147              
148              
149              
150             ###########################################################################
151              
152             =item $Request->SearchEngine
153              
154             Returns the Yahoo::Search "search engine" object used in creating this
155             request.
156              
157             =cut
158              
159             sub SearchEngine
160             {
161 3     3 1 5 my $Request = shift; # self
162 3         155 return $Request->{SearchEngine};
163             }
164              
165              
166              
167             ##
168             ## Some search spaces spaces have very simple data --
169             ## they are simple text phrases, and not further nested xml.
170             ##
171             my %SimpleResultSpace =
172             (
173             Spell => 1,
174             Related => 1,
175             Terms => 1,
176             );
177              
178              
179             ###########################################################################
180              
181             =item $Request->Fetch
182              
183             Actually contact the Yahoo Search servers, returning a C
184             (Yahoo::Search::Result) object.
185              
186             =cut
187              
188             our $UA;
189              
190             sub Fetch
191             {
192 3     3 1 7 my $Request = shift; # self
193             ## no other args
194              
195             ##
196             ## Fetch -- get the response (which contains xml, hopefully)
197             ##
198              
199 3 50       16 if (my $callback = $Request->SearchEngine->Default('PreRequestCallback'))
200             {
201 0 0       0 if (not $callback->($Request)) {
202 0   0     0 $@ ||= "aborted because PreRequestCallback returned false";
203 0         0 return ();
204             }
205             }
206              
207 3         56 $Yahoo::Search::RecentRequestUrl = $Request->Url;
208              
209 3 50       35 warn "Fetching url: $Yahoo::Search::RecentRequestUrl\n" if $Request->{Debug} =~ m/url/x;
210              
211             ## create the useragent object just the first time.
212 3   66     42 $UA ||= LWP::UserAgent->new(agent => "Yahoo::Search ($Yahoo::Search::VERSION)", env_proxy => 1);
213              
214 3         20565 my $response;
215              
216             ##
217             ## Yahoo! servers allow a GET until the GET line (including "GET" and
218             ## ending "\r\n" is 8192 bytes long. The following switches to POST
219             ## once it gets close. (To bring a GET pedantically up to the limit,
220             ## we'd have to switch to POST once what follows the '?' in the URL is
221             ## more than 8186 bytes, but there's really no reason to push right up
222             ## to the limit.)
223             ##
224 3 50       20 if (length($Yahoo::Search::RecentRequestUrl) < 8180) {
225 3         20 $response = $UA->get($Yahoo::Search::RecentRequestUrl);
226             } else {
227 0         0 $response = $UA->post($Request->{Action}, $Request->{Params});
228             }
229              
230             ##
231             ## Ensure we have a good result
232             ##
233 3 50       179238 if (not $response) {
234 0         0 $@ = "couldn't make request";
235 0         0 return ();
236             }
237              
238             ##
239             ## Nab (and if debugging, report) the xml
240             ##
241 3         14 my $xml = $response->content;
242 3 50       50 print $xml, "\n" if $Request->{Debug} =~ m/xml/x;
243 3 50       14 if ($Request->{Debug} =~ m/XMLtmp/) {
244 0         0 open XMLTMP, ">/tmp/XML";
245 0         0 print XMLTMP $xml;
246 0         0 close XMLTMP;
247             }
248              
249             ##
250             ## Even if the response is not successful, it may still be XML and may
251             ## have an error message in it.
252             ##
253 3 50       15 if (not $response->is_success)
254             {
255 3 50 33     83 if ($xml and $xml =~ m{(.+?)}s) {
    50          
256 0         0 $@ = "Bad Request: $1";
257             } elsif ($response->status_line) {
258 3         117 $@ = $response->status_line;
259             } else {
260 0         0 $@ = "ERROR"; ## unknown error
261             }
262 3         117 return ();
263             }
264              
265 0 0         if (not $xml) {
266 0           $@ = "empty response from Yahoo server";
267 0           return ();
268             }
269              
270             ##
271             ## Turn the XML into a Perl hash.
272             ##
273             ## If we're told to use XML::Simple, we'll do so directly.
274             ## Otherwise, we'll try our own mini (==fast) Yahoo::Search::XML. If it
275             ## can't grok the XML, we'll revert to XML::Simple, asking the user to
276             ## file a bug report....
277             ##
278             ## The following is more verbose than need be, but the more succinct
279             ## code is convoluted for little gain.
280             ##
281 0           my $ResultHash;
282 0 0         if ($Yahoo::Search::UseXmlSimple)
283             {
284 0 0         if (not _have_XML_Simple()) {
285 0           $@ = "\$Yahoo::Search::UseXmlSimple is true, but XML::Simple is not installed";
286 0           return ();
287             }
288              
289 0           $ResultHash = eval { XML::Simple::XMLin($xml) };
  0            
290 0 0         if (not $ResultHash) {
291 0           $@ = "Yahoo::Request: Error processing XML by XML::Simple: $@";
292 0           return ();
293             }
294             }
295             else
296             {
297             ## first try my mini parser
298 0           $ResultHash = eval { Yahoo::Search::XML::Parse($xml) };
  0            
299              
300 0 0         if (not $ResultHash)
301             {
302 0           my $orig_error = $@;
303              
304             ##
305             ## Give XML::Simple a chance, if it's there
306             ##
307 0 0         if (not _have_XML_Simple())
308             {
309 0           warn "Yahoo::Search::XML is having trouble with the XML returned from Yahoo ($orig_error); try installing XML::Simple and setting \$Yahoo::Search::UseXmlSimple to true, and filing a bug report.\n";
310 0           $@ = "Yahoo::Request: Error processing XML: $orig_error";
311 0           return ();
312             }
313              
314 0           $ResultHash = eval { XML::Simple::XMLin($xml) };
  0            
315              
316 0 0         if (not $ResultHash) {
317 0           $@ .= "Yahoo::Request: Error processing XML (even tried XML::Simple): $orig_error";
318 0           return ();
319             }
320             ##
321             ## XML::Simple could parse it, but Yahoo::Search::XML couldn't,
322             ## so it must be a bug with the former... )_:
323             ##
324 0           $Yahoo::Search::UseXmlSimple = 1;
325 0           warn "Yahoo::Search::XML is having trouble with the XML returned from Yahoo ($orig_error), so reverting to XML::Simple; suggest setting \$Yahoo::Search::UseXmlSimple to true and filing a bug report.\n";
326             }
327             }
328              
329              
330             ##
331             ## If there is only one result, $ResultHash->{Result} will be a hash
332             ## ref rather than the ref to an array of hash refs that we would
333             ## otherwise expect, so we'll fix that here.
334             ##
335 0 0         if (not exists $ResultHash->{Result}) {
    0          
336 0           $ResultHash->{Result} = [ ];
337             } elsif (ref($ResultHash->{Result}) ne "ARRAY") {
338 0           $ResultHash->{Result} = [ $ResultHash->{Result} ];
339             }
340              
341             ##
342             ## The mention of "hash ref" in the previous comment doesn't apply
343             ## to Spell and Related spaces -- let's fix that.
344             ##
345 0 0         if ($SimpleResultSpace{$Request->SearchSpace})
346             {
347 0           my @Results;
348 0           for my $item (@{ $ResultHash->{Result}}) {
  0            
349 0           push @Results, { Term => $item };
350             }
351 0           $ResultHash->{Result} = \@Results;
352              
353              
354             ##
355             ## These are not part of what's returned, but it makes it easier
356             ## for us if they're there, so fake'em.
357             ##
358 0 0         $ResultHash->{firstResultPosition} = @Results ? 1 : 0;
359 0           $ResultHash->{totalResultsAvailable} = scalar @Results;
360              
361             ##
362             ## Add this hint to the rest of the code to not allow
363             ## further requests (e.g. via AutoContinue).
364             ##
365 0           $ResultHash->{_NoFurtherRequests} = 1;
366             }
367              
368             ##
369             ## Report if needed.
370             ##
371 0 0         if ($Request->{Debug} =~ m/hash/x) {
372 0           require Data::Dumper;
373 0           local($Data::Dumper::Terse) = 1;
374 0           warn "Grokked Hash: ", Data::Dumper::Dumper($ResultHash), "\n";
375             }
376              
377 0           $ResultHash->{_Request} = $Request;
378 0           $ResultHash->{_XML} = $xml;
379              
380             ##
381             ## Create (and return) a new Response object from the request and the
382             ## returned hash.
383             ##
384 0           return Yahoo::Search::Response->new($ResultHash);
385             }
386              
387              
388              
389             ###########################################################################
390              
391             =item $Request->RelatedRequest
392              
393             =item $Request->RelatedResponse
394              
395             Perform a I request for search terms related to the query phrase
396             of the current request, returning the new C or C object,
397             respectively.
398              
399             Both return nothing if the current request is already for a I
400             search.
401              
402             =cut
403              
404              
405             sub RelatedRequest
406             {
407 0     0 1   my $Request = shift;
408              
409 0 0         if ($Request->SearchSpace eq "Related") {
410 0           return ();
411             } else {
412 0           return $Request->SearchEngine->Request(Related => $Request->{Params}->{query});
413             }
414             }
415              
416             sub RelatedResponse
417             {
418 0     0 1   my $Request = shift;
419 0 0         if (my $new = $Request->RelatedRequest) {
420 0           return $new->Fetch();
421             } else {
422 0           return ();
423             }
424             }
425              
426              
427             ###########################################################################
428              
429             =item $Request->SpellRequest
430              
431             =item $Request->SpellResponse
432              
433             Perform a I request for a search term that may reflect proper
434             spelling of the query phrase of the current request, returning the new
435             C or C object, respectively.
436              
437             Both return nothing if the current request is already for a I
438             search.
439              
440             =cut
441              
442              
443             sub SpellRequest
444             {
445 0     0 1   my $Request = shift;
446              
447 0 0         if ($Request->SearchSpace eq "Spell") {
448 0           return ();
449             } else {
450 0           return $Request->SearchEngine->Request(Spell => $Request->{Params}->{query});
451             }
452             }
453              
454             sub SpellResponse
455             {
456 0     0 1   my $Request = shift;
457 0 0         if (my $new = $Request->SpellRequest) {
458 0           return $new->Fetch();
459             } else {
460 0           return ();
461             }
462             }
463              
464              
465             =pod
466              
467             =back
468              
469             =head1 Author
470              
471             Jeffrey Friedl (jfriedl@yahoo.com)
472              
473             =cut
474              
475             1;