File Coverage

blib/lib/WWW/SearchResult.pm
Criterion Covered Total %
statement 73 77 94.8
branch 7 8 87.5
condition 2 4 50.0
subroutine 39 42 92.8
pod 36 36 100.0
total 157 167 94.0


line stmt bran cond sub pod time code
1             # SearchResult.pm
2             # by John Heidemann
3             # Copyright (C) 1996 by USC/ISI
4             # $Id: SearchResult.pm,v 2.78 2008-07-21 01:20:30 Martin Exp $
5             #
6             # Copyright (c) 1996 University of Southern California.
7             # All rights reserved.
8             #
9             # Redistribution and use in source and binary forms are permitted
10             # provided that the above copyright notice and this paragraph are
11             # duplicated in all such forms and that any documentation, advertising
12             # materials, and other materials related to such distribution and use
13             # acknowledge that the software was developed by the University of
14             # Southern California, Information Sciences Institute. The name of the
15             # University may not be used to endorse or promote products derived from
16             # this software without specific prior written permission.
17             #
18             # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
19             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
20             # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21              
22              
23             =head1 NAME
24              
25             WWW::SearchResult - class for results returned from WWW::Search
26              
27             =head1 SYNOPSIS
28              
29             require WWW::Search;
30             require WWW::SearchResult;
31             $search = new WWW::Search;
32             $search->native_query(WWW::Search::escape_query($query));
33             # Get first result:
34             $result = $search->next_result();
35              
36             =head1 DESCRIPTION
37              
38             A framework for returning the results of C.
39              
40             =head1 SEE ALSO
41              
42             L
43              
44             =head1 REQUIRED RESULTS
45              
46             The particular fields returned in a result are backend- (search
47             engine-) dependent. However, all search engines are required to
48             return a url and title. (This list may grow in the future.)
49              
50             =head1 METHODS AND FUNCTIONS
51              
52             =cut
53              
54             #####################################################################
55              
56             package WWW::SearchResult;
57              
58 3     3   6664 use strict;
  3         7  
  3         87  
59 3     3   19 use warnings;
  3         7  
  3         73  
60              
61 3     3   14 use CGI;
  3         6  
  3         21  
62 3     3   186 use base 'LWP::MemberMixin';
  3         15  
  3         3875  
63             our
64             $VERSION = do{ my @r = (q$Revision: 2.78 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r};
65              
66             =head2 new
67              
68             To create a new WWW::SearchResult, call
69              
70             $result = new WWW::SearchResult();
71              
72             =cut
73              
74             sub new
75             {
76 54     54 1 687 my $class = shift;
77 54         105 my $self = bless { }, $class;
78 54         122 $self->{urls} = ();
79 54         98 return $self;
80             } # new
81              
82             =head2 url
83              
84             Returns the primary URL. Note that there may be a list of urls, see
85             also methods C and C. Nothing special is guaranteed
86             about the primary URL other than that it is the first one returned by
87             the back end.
88              
89             Every result is required to have at least one URL.
90              
91             =cut
92              
93             sub url
94             {
95 56     56 1 85 my $self = shift;
96 56 100       117 if (@_ < 1)
97             {
98             # No arguments, just return the current value:
99 3         8 return ${$self->{urls}}[0];
  3         15  
100             } # if no args
101 53         85 unshift @{$self->{urls}}, $_[0];
  53         126  
102 53         125 return $self->{urls}->[0];
103             } # url
104              
105             sub _elem_array
106             {
107 85     85   119 my $self = shift;
108 85         130 my $elem = shift;
109 85 100       166 if (@_ < 1)
110             {
111             # No arguments
112 4 50       27 return wantarray ? @{$self->{$elem}} : $self->{$elem};
  0         0  
113             } # if
114 81 100       149 if (ref($_[0]))
115             {
116             # Trying to assign an arrayref:
117 27         57 $self->{$elem} = $_[0];
118             }
119             else
120             {
121             # Trying to set to a scalar (or list of scalars); make sure it's
122             # an array even if they give one element:
123 54         108 $self->{$elem} = undef;
124 54         77 push @{$self->{$elem}}, @_;
  54         483  
125             }
126             # Always return array refrence
127 81         202 return $self->{$elem};
128             } # _elem_array
129              
130             sub _add_elem_array
131             {
132 2959     2959   3773 my $self = shift;
133 2959         3502 my $elem = shift;
134             # No matter how many they're adding:
135 2959         3349 push(@{$self->{$elem}}, @_);
  2959         6831  
136             } # _add_elem_array
137              
138              
139             =head2 add_url
140              
141             Add a URL to the list.
142              
143             =cut
144              
145 987     987 1 1586 sub add_url { return shift->_add_elem_array('urls', @_); }
146              
147             =head2 urls
148              
149             Return a reference to the list of urls.
150             There is also a primary URL (C).
151              
152             =cut
153              
154 29     29 1 77 sub urls { return shift->_elem_array('urls', @_); }
155              
156             =head2 add_related_url
157              
158             Add a URL to the related_url list.
159              
160             =cut
161              
162 986     986 1 1531 sub add_related_url { return shift->_add_elem_array('related_urls', @_); }
163              
164             =head2 related_urls
165              
166             Return a reference to the list of related urls.
167              
168             =cut
169              
170 28     28 1 66 sub related_urls { return shift->_elem_array('related_urls', @_); }
171              
172             =head2 add_related_title
173              
174             Add a title to the list or related titles.
175              
176             =cut
177              
178 986     986 1 1530 sub add_related_title { return shift->_add_elem_array('related_titles', @_); }
179              
180             =head2 related_titles
181              
182             Return a reference to the list of related titles.
183              
184             =cut
185              
186 28     28 1 69 sub related_titles { return shift->_elem_array('related_titles', @_); }
187              
188              
189             =head2 title, description, score, change_date, index_date, size, raw
190              
191             Set or get attributes of the result.
192              
193             None of these attributes is guaranteed to be provided by
194             a given backend. If an attribute is not provided
195             its method will return C.
196              
197             Typical contents of these attributes:
198              
199             =over 8
200              
201             =item title
202              
203             The title of the hit result (typically that provided by the 'TITLE'
204             HTML tag).
205              
206             =cut
207              
208 56     56 1 162 sub title { return shift->_elem('title', @_); }
209              
210             =item description
211              
212             A brief description of the result, as provided (or not) by the search engine.
213             Often the first few sentences of the document.
214              
215             =cut
216              
217 56     56 1 240 sub description { return shift->_elem('description', @_); }
218              
219             =item source
220              
221             Source is either the base url for this result (as listed on the search
222             engine's results page) or another copy of the full url path of the
223             result. It might also indicate the source site name or address whence
224             the result came, for example, 'CNN' or 'http://www.cnn.com' if the
225             search result page said "found at CNN.com".
226              
227             This value is backend-specific; in fact very few backends set this
228             value.
229              
230             =cut
231              
232 54     54 1 112 sub source { return shift->_elem('source', @_); }
233              
234             =item add_sources
235              
236             Same meaning as source above, for adding sources in case there are
237             potentially multiple sources.
238              
239             =cut
240              
241 0     0 1 0 sub add_sources { return shift->_add_elem_array('sources', @_); }
242              
243             =item sources
244              
245             Returns a reference to the list of sources.
246              
247             =cut
248              
249 0     0 1 0 sub sources { return shift->_elem_array('sources', @_); }
250              
251             =item score
252              
253             A backend specific, numeric score of the search result.
254             The exact range of scores is search-engine specific.
255             Usually larger scores are better, but this is no longer required.
256             See normalized_score for a backend independent score.
257              
258             =cut
259              
260 54     54 1 113 sub score { return shift->_elem('score', @_); }
261              
262             =item normalized_score
263              
264             This is intended to be a backend-independent score of the search
265             result. The range of this score is between 0 and 1000. Higher values
266             indicate better quality results.
267              
268             This is not really implemented since no one has created an
269             backend-independent ranking algorithm.
270              
271             =cut
272              
273 54     54 1 113 sub normalized_score { return shift->_elem('normalized_score', @_); }
274              
275             =item change_date
276              
277             When the result was last changed. Typically this is the modification
278             time of the destination web page.
279              
280             =cut
281              
282 54     54 1 113 sub change_date { return shift->_elem('change_date', @_); }
283              
284             =item index_date
285              
286             When the search engine indexed the result.
287              
288             =cut
289              
290 54     54 1 116 sub index_date { return shift->_elem('index_date', @_); }
291              
292             =item size
293              
294             The approximate size of the result, in bytes. This is only an
295             approximation because search backends often report the size as
296             "18.4K"; the best we can do with that number is return it as the value
297             of 18.4 * 1024.
298              
299             =cut
300              
301 54     54 1 108 sub size { return shift->_elem('size', @_); }
302              
303             =item raw
304              
305             The raw HTML for the entire result. Raw should be exactly the raw
306             HTML for one entry. It should not include list or table setup
307             commands (like ul or table tags), but it may include list item or
308             table data commands (like li, tr, or td). Whether raw contains a list
309             entry, table row, br-separated lines, or plain text is search-engine
310             dependent. In fact, many backends do not even return it at all.
311              
312             =cut
313              
314 54     54 1 106 sub raw { return shift->_elem('raw', @_); }
315              
316             =item as_HTML
317              
318             Convert the search result to a human-readable form,
319             decorated with HTML for pretty-printing.
320              
321             =cut
322              
323             sub as_HTML
324             {
325 1     1 1 3 my $self = shift;
326 1         4 my $cgi = new CGI;
327 1   50     244 my $s = $cgi->a({
328             href => $self->url,
329             },
330             $self->title || 'title unknown',
331             );
332 1         81 $s .= $cgi->br;
333 1   50     22 $s .= $self->description || 'no description available';
334 1         14 return $s;
335             } # as_HTML
336              
337             =back
338              
339             =head2 Others
340              
341             More attributes of the result. Backend-specific.
342             Refer to the documentation of each backend for details.
343              
344             =over
345              
346             =item bid_amount
347              
348             =cut
349              
350 1     1 1 14 sub bid_amount { return shift->_elem('bid', @_); }
351              
352             =item bid_count
353              
354             =cut
355              
356 1     1 1 12 sub bid_count { return shift->_elem('bids', @_); }
357              
358             =item bidder
359              
360             =cut
361              
362 1     1 1 12 sub bidder { return shift->_elem('bidder', @_); }
363              
364             =item category
365              
366             =cut
367              
368 1     1 1 12 sub category { return shift->_elem('category', @_); }
369              
370             =item company
371              
372             =cut
373              
374 54     54 1 113 sub company { return shift->_elem('company', @_); }
375              
376             =item end_date
377              
378             =cut
379              
380 1     1 1 14 sub end_date { return shift->_elem('end_date', @_); }
381              
382             =item image_url
383              
384             =cut
385              
386 1     1 1 17 sub image_url { return shift->_elem('image_url', @_); }
387              
388             =item item_number
389              
390             =cut
391              
392 1     1 1 11 sub item_number { return shift->_elem('item_number', @_); }
393              
394             =item location
395              
396             =cut
397              
398 54     54 1 117 sub location { return shift->_elem('location', @_); }
399              
400             =item question_count
401              
402             =cut
403              
404 1     1 1 12 sub question_count { return shift->_elem('question_count', @_); }
405              
406             =item seller
407              
408             =cut
409              
410 1     1 1 11 sub seller { return shift->_elem('seller', @_); }
411              
412             =item shipping
413              
414             =cut
415              
416 1     1 1 11 sub shipping { return shift->_elem('shipping', @_); }
417              
418             =item sold
419              
420             =cut
421              
422 0     0 1 0 sub sold { return shift->_elem('sold', @_); }
423              
424             =item start_date
425              
426             =cut
427              
428 1     1 1 13 sub start_date { return shift->_elem('start_date', @_); }
429              
430             =item thumb_url
431              
432             =cut
433              
434 1     1 1 10 sub thumb_url { return shift->_elem('thumb_url', @_); }
435              
436             =item watcher_count
437              
438             =cut
439              
440 1     1 1 11 sub watcher_count { return shift->_elem('seller', @_); }
441              
442             =back
443              
444             =head1 AUTHOR
445              
446             WWW::SearchResult was written by John Heidemann.
447             WWW::SearchResult is maintained by Martin Thurn.
448              
449             =cut
450              
451             1;
452              
453             __END__