File Coverage

blib/lib/WWW/Search/Ebay.pm
Criterion Covered Total %
statement 42 461 9.1
branch 0 214 0.0
condition 0 72 0.0
subroutine 14 40 35.0
pod 5 5 100.0
total 61 792 7.7


!!gi);
line stmt bran cond sub pod time code
1              
2             # $Id: Ebay.pm,v 2.271 2015-09-13 14:28:54 Martin Exp $
3              
4             package WWW::Search::Ebay;
5              
6 1     1   13569 use strict;
  1         2  
  1         37  
7 1     1   4 use warnings;
  1         2  
  1         43  
8              
9             =head1 NAME
10              
11             WWW::Search::Ebay - backend for searching www.ebay.com
12              
13             =head1 SYNOPSIS
14              
15             use WWW::Search;
16             my $oSearch = new WWW::Search('Ebay');
17             my $sQuery = WWW::Search::escape_query("C-10 carded Yakface");
18             $oSearch->native_query($sQuery);
19             while (my $oResult = $oSearch->next_result())
20             { print $oResult->url, "\n"; }
21              
22             =head1 DESCRIPTION
23              
24             This class is a Ebay specialization of L.
25             It handles making and interpreting Ebay searches
26             F.
27              
28             This class exports no public interface; all interaction should
29             be done through L objects.
30              
31             =head1 NOTES
32              
33             The search is done against CURRENT running AUCTIONS only.
34             (NOT completed auctions, NOT eBay Stores items, NOT Buy-It-Now only items.)
35             (If you want to search completed auctions, use the L module.)
36             (If you want to search eBay Stores, use the L module.)
37              
38             The query is applied to TITLES only.
39              
40             This module can return only the first 200 results matching your query.
41              
42             In the resulting L objects, the description()
43             field consists of a human-readable combination (joined with
44             semicolon-space) of the Item Number; number of bids; and high bid
45             amount (or starting bid amount).
46              
47             In the resulting L objects, the end_date() field
48             contains a human-readable DTG of when the auction is scheduled to end
49             (in the form "YYYY-MM-DD HH:MM TZ"). If environment variable TZ is
50             set, the time will be converted to that timezone; otherwise the time
51             will be left in ebay.com's default timezone (US/Pacific).
52              
53             In the resulting L objects, the bid_count() field
54             contains the number of bids as an integer.
55              
56             In the resulting L objects, the bid_amount()
57             field is a string containing the high bid or starting bid as a
58             human-readable monetary value in seller-native units, e.g. "$14.95" or
59             "GBP 6.00".
60              
61             In the resulting L objects, the sold() field will
62             be non-zero if the item has already sold. (Only if you're using
63             WWW::Search::Ebay::Completed)
64              
65             After a successful search, your search object will contain an element
66             named 'categories' which will be a reference to an array of hashes
67             containing names and IDs of categories and nested subcategories, and
68             the count of items matching your query in each category and
69             subcategory. (Special thanks to Nick Lokkju for this code!) For
70             example:
71              
72             $oSearch->{categories} = [
73             {
74             'ID' => '1',
75             'Count' => 19,
76             'Name' => 'Collectibles',
77             'Subcategory' => [
78             {
79             'ID' => '13877',
80             'Count' => 11,
81             'Name' => 'Historical Memorabilia'
82             },
83             {
84             'ID' => '11450',
85             'Count' => 1,
86             'Name' => 'Clothing, Shoes & Accessories'
87             },
88             ]
89             },
90             {
91             'ID' => '281',
92             'Count' => 1,
93             'Name' => 'Jewelry & Watches',
94             }
95             ];
96              
97             If your query string happens to be an eBay item number,
98             (i.e. if ebay.com redirects the query to an auction page),
99             you will get back one WWW::Search::Result without bid or price information.
100              
101             =head1 OPTIONS
102              
103             =over
104              
105             =item Limit search by price range
106              
107             Contributed by Brian Wilson:
108              
109             $oSearch->native_query($sQuery, {
110             _mPrRngCbx=>'1', _udlo=>$minPrice, _udhi=>$maxPrice,
111             } );
112              
113             =back
114              
115             =head1 PUBLIC METHODS OF NOTE
116              
117             =over
118              
119             =cut
120              
121 1     1   13 use base 'WWW::Search';
  1         2  
  1         1021  
122              
123 1     1   144047 use constant DEBUG_DATES => 0;
  1         3  
  1         51  
124 1     1   4 use constant DEBUG_COLUMNS => 0;
  1         2  
  1         38  
125              
126 1     1   5 use Carp ();
  1         1  
  1         15  
127 1     1   26 use CGI;
  1         2  
  1         7  
128 1     1   1015 use Data::Dumper; # for debugging only
  1         6630  
  1         67  
129 1     1   820 use Date::Manip;
  1         147056  
  1         182  
130             # Date_Init("setdate=now,America/Los_Angeles");
131 1     1   11 use HTML::TreeBuilder;
  1         3  
  1         16  
132 1     1   917 use LWP::Simple;
  1         8380  
  1         7  
133 1     1   389 use WWW::Search qw( generic_option strip_tags );
  1         2  
  1         74  
134             # We need the version that has the sold() method:
135 1     1   731 use WWW::SearchResult 2.072;
  1         15840  
  1         33  
136 1     1   884 use WWW::Search::Result;
  1         208  
  1         6488  
137              
138             our
139             $VERSION = 2.272;
140             our $MAINTAINER = 'Martin Thurn ';
141             my $cgi = new CGI;
142              
143             sub _native_setup_search
144             {
145 0     0     my ($self, $native_query, $rhOptsArg) = @_;
146              
147             # Set some private variables:
148 0   0       $self->{_debug} ||= $rhOptsArg->{'search_debug'};
149 0 0         $self->{_debug} = 2 if ($rhOptsArg->{'search_parse_debug'});
150 0   0       $self->{_debug} ||= 0;
151              
152 0           my $DEFAULT_HITS_PER_PAGE = 200;
153 0           $self->{'_hits_per_page'} = $DEFAULT_HITS_PER_PAGE;
154              
155 0           $self->user_agent('non-robot');
156 0           $self->agent_name('Mozilla/5.0 (compatible; Mozilla/4.0; MSIE 6.0; Windows NT 5.1; Q312461)');
157              
158 0           $self->{'_next_to_retrieve'} = 0;
159 0           $self->{'_num_hits'} = 0;
160             # As of 2013-03-01 (probably much before that, but first time I
161             # looked at it in quite a while):

To use our basic experience

162             # which does not require JavaScript,
163             # href="http://www.ebay.com/sch/i.html?LH_Auction=1&_nkw=trinidad+tobago+flag&_armrs=1&_from=&_ipg=50&_jsoff=1">click
164             # here.

165 0   0       $self->{search_host} ||= 'http://www.ebay.com'; # as of 2013-03-01
166 0   0       $self->{search_host} ||= 'http://search.ebay.com';
167 0   0       $self->{search_path} ||= '/sch/i.html'; # as of 2013-03-01
168 0   0       $self->{search_path} ||= '/ws/search/SaleSearch';
169 0 0         if (!defined($self->{_options}))
170             {
171             # http://shop.ebay.com/items/_W0QQLHQ5fBINZ1?_nkw=trinidad+flag&_sacat=0&_fromfsb=&_trksid=m270.l1313&_odkw=burkina+faso+flag&_osacat=0
172             $self->{_options} = {
173             satitle => $native_query,
174             # Search AUCTIONS ONLY:
175             sasaleclass => 1,
176             # Display item number explicitly:
177             socolumnlayout => 2,
178             # Do not convert everything to US$:
179             socurrencydisplay => 1,
180             sorecordsperpage => $self->{_hits_per_page},
181             _ipg => $self->{_hits_per_page},
182             # Display absolute times, NOT relative times:
183 0           sotimedisplay => 0,
184             # Use the default columns, NOT anything the
185             # user may have customized (which would come
186             # through via cookies):
187             socustoverride => 1,
188             # Output basic HTML, not JavaScript:
189             _armrs => 1,
190             };
191             $self->{_options} = {
192             _nkw => $native_query,
193             _armrs => 1,
194             # Turn off JavaScript:
195             _jsoff => 1,
196             # Search AUCTIONS ONLY:
197             LH_Auction => 1,
198             _ipg => $self->{_hits_per_page},
199             # Which page are we on:
200             # _from => 2,
201             #
202 0           };
203             } # if
204 0 0         if (defined($rhOptsArg))
205             {
206             # Copy in new options.
207 0           foreach my $key (keys %$rhOptsArg)
208             {
209             # print STDERR " DDD inspecting option $key...";
210 0 0         if (WWW::Search::generic_option($key))
211             {
212             # print STDERR "promote & delete\n";
213 0 0         $self->{$key} = $rhOptsArg->{$key} if defined($rhOptsArg->{$key});
214 0           delete $rhOptsArg->{$key};
215             }
216             else
217             {
218             # print STDERR "copy\n";
219 0 0         $self->{_options}->{$key} = $rhOptsArg->{$key} if defined($rhOptsArg->{$key});
220             }
221             } # foreach
222             } # if
223             # Clear the list of results per category:
224 0           $self->{categories} = [];
225             # Finally, figure out the url.
226 0           $self->{_next_url} = $self->{'search_host'} . $self->{'search_path'} .'?'. $self->hash_to_cgi_string($self->{_options});
227             } # _native_setup_search
228              
229              
230             =item user_agent_delay
231              
232             Introduce a few-seconds delay to avoid overwhelming the server.
233              
234             =cut
235              
236             sub user_agent_delay
237             {
238 0     0 1   my $self = shift;
239             # return;
240 0           my $iSecs = int(3 + rand(3));
241 0 0         print STDERR " DDD sleeping $iSecs seconds...\n" if (0 < $self->{_debug});
242 0           sleep($iSecs);
243             } # user_agent_delay
244              
245              
246             =item need_to_delay
247              
248             Controls whether we do the delay or not.
249              
250             =cut
251              
252             sub need_to_delay
253             {
254 0     0 1   1;
255             } # need_to_delay
256              
257              
258             =item preprocess_results_page
259              
260             Grabs the eBay Official Time so that when we parse the DTG from the
261             HTML, we can convert / return exactly what eBay means for each one.
262              
263             =cut
264              
265             sub preprocess_results_page
266             {
267 0     0 1   my $self = shift;
268 0           my $sPage = shift;
269 0 0         if (25 < $self->{_debug})
270             {
271             # print STDERR Dumper($self->{response});
272             # For debugging:
273 0           print STDERR $sPage;
274 0           exit 88;
275             } # if
276 0   0       my $sTitle = $self->{response}->header('title') || '';
277 0           my $qrTitle = $self->_title_pattern;
278 0 0         if ($sTitle =~ m!$qrTitle!)
279             {
280             # print STDERR " DDD got a Title: ==$sTitle==\n";
281             # This search returned a single auction item page. We do not need
282             # to fetch eBay official time.
283             } # if
284             else
285             {
286             # Use the UserAgent object in $self to fetch the official ebay.com time:
287 0           $self->{_ebay_official_time} = 'now';
288             # my $sPageDate = get('http://cgi1.ebay.com/aw-cgi/eBayISAPI.dll?TimeShow') || '';
289 0   0       my $sPageDate = $self->http_request(GET => 'http://viv.ebay.com/ws/eBayISAPI.dll?EbayTime')->content || '';
290 0 0         if ($sPageDate eq '')
291             {
292 0           die " EEE could not fetch official eBay time";
293             }
294             else
295             {
296 0           my $tree = HTML::TreeBuilder->new;
297 0           $tree->utf8_mode('true');
298 0           $tree->parse($sPageDate);
299 0           $tree->eof;
300 0           my $s = $tree->as_text;
301             # print STDERR " DDD official time =====$s=====\n";
302 0 0         if ($s =~ m!The official eBay Time is now:(.+?(P[SD]T))\s*Pacific\s!i)
303             {
304 0           my ($sDateRaw, $sTZ) = ($1, $2);
305 0           DEBUG_DATES && print STDERR " DDD official time raw ==$sDateRaw==\n";
306             # Apparently, ParseDate() automatically converts to local timezone:
307 0           my $date = ParseDate($sDateRaw);
308 0           DEBUG_DATES && print STDERR " DDD official time cooked ==$date==\n";
309 0           $self->{_ebay_official_time} = $date;
310             } # if
311             } # else
312             } # else
313 0           return $sPage;
314             # Ebay used to send malformed HTML:
315             # my $iSubs = 0 + ($sPage =~ s!
316             # print STDERR " DDD deleted $iSubs extraneous tags\n" if 1 < $self->{_debug};
317             } # preprocess_results_page
318              
319             sub _cleanup_url
320             {
321 0     0     my $self = shift;
322 0   0       my $sURL = shift() || '';
323             # Make sure we don't return two different URLs for the same item:
324 0           $sURL =~ s!&rd=\d+!!;
325 0           $sURL =~ s!&category=\d+!!;
326 0           $sURL =~ s!&ssPageName=[A-Z0-9]+!!;
327 0           return $sURL;
328             } # _cleanup_url
329              
330             sub _format_date
331             {
332 0     0     my $self = shift;
333 0           return UnixDate(shift, '%Y-%m-%d %H:%M %Z');
334             } # _format_date
335              
336             sub _bidcount_as_text
337             {
338 0     0     my $self = shift;
339 0           my $hit = shift;
340 0   0       my $iBids = $hit->bid_count || 'no';
341 0           my $s = "$iBids bid";
342 0 0         $s .= 's' if ($iBids ne '1');
343 0           $s .= '; ';
344             } # _bidcount_as_text
345              
346             sub _bidamount_as_text
347             {
348 0     0     my $self = shift;
349 0           my $hit = shift;
350 0   0       my $iPrice = $hit->bid_amount || 'unknown';
351 0           my $sDesc = '';
352 0 0         $sDesc .= $hit->bid_count ? 'current' : 'starting';
353 0           $sDesc .= " bid $iPrice";
354             } # _bidamount_as_text
355              
356             sub _create_description
357             {
358 0     0     my $self = shift;
359 0           my $hit = shift;
360 0   0       my $iItem = $hit->item_number || 'unknown';
361 0   0       my $sWhen = shift() || 'current';
362             # print STDERR " DDD _c_d($iItem, $iBids, $iPrice, $sWhen)\n";
363 0           my $sDesc = "Item \043$iItem; ". $self->_bidcount_as_text($hit);
364 0           $sDesc .= $self->_bidamount_as_text($hit);
365 0           return $sDesc;
366             } # _create_description
367              
368             sub _parse_category
369             {
370 0     0     my $self = shift;
371 0           my $oTD = shift;
372 0 0         return -1 if ! ref $oTD;
373 0           my $oA = $oTD->look_down(_tag => 'a');
374 0 0         return -1 if ! ref $oA;
375 0 0         if (DEBUG_COLUMNS || (1 < $self->{_debug}))
376             {
377 0           my $s = $oA->as_HTML;
378 0           print STDERR " DDD TDcategory's A ===$s===\n";
379             } # if
380 0   0       my $sURL = $oA->attr('href') || q{};
381 0 0         if ($sURL =~ m/sibeleafcat=(\d+)/)
382             {
383 0           return $1;
384             } # if
385 0           return -1;
386             } # _parse_category
387              
388             sub _parse_price
389             {
390 0     0     my $self = shift;
391 0           my $oTDprice = shift;
392 0           my $hit = shift;
393 0 0         return 0 unless (ref $oTDprice);
394 0           my $s = $oTDprice->as_HTML;
395 0 0         if (DEBUG_COLUMNS || (1 < $self->{_debug}))
396             {
397 0           print STDERR " DDD try TDprice ===$s===\n";
398             } # if
399 0 0         if ($oTDprice->attr('class') =~ m'\bebcBid\b')
400             {
401             # If we see this, we must have been searching for Stores items
402             # but we ran off the bottom of the Stores item list and ran
403             # into the list of "other" items.
404 0           return 1;
405             # We could probably return 0 to abandon the rest of the page, but
406             # maybe just maybe we hit this because of a parsing glitch which
407             # might correct itself on the next TD.
408             } # if
409 0 0         if ($oTDprice->attr('class') !~ m'\b(ebcPr|prices|prc)\b')
410             {
411             # If we see this, we probably were searching for Store items
412             # but we ran off the bottom of the Store item list and ran
413             # into the list of Auction items.
414 0           return 0;
415             # There is a separate backend for searching Auction items!
416             } # if
417 0 0 0       if (
418             $oTDprice->look_down(_tag => 'span',
419             class => 'ebSold')
420             ||
421             $oTDprice->look_down(_tag => 'span',
422             class => 'bold bidsold')
423             )
424             {
425             # This item sold, even if it had no bids (i.e. Buy-It-Now)
426 0           $hit->sold(1);
427             } # if
428 0 0         if (my $oChild = $oTDprice->look_down(_tag => 'div',
429             itemprop => 'price'))
430             {
431             # As of 2013-03, we need to separate out the price and the bid:
432 0           $oTDprice = $oChild;
433             } # if
434 0           my $iPrice = $oTDprice->as_text;
435 0 0         print STDERR " DDD raw iPrice ===$iPrice===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
436 0           $iPrice =~ s!£!GBP!;
437 0           $iPrice =~ s!\s*Trending.+!!;
438 0           $iPrice =~ s!\s*Was.+!!;
439             # Convert nbsp to regular space:
440 0           $iPrice =~ s!\240!\040!g;
441             # I don't know why there are sometimes weird characters in there:
442 0           $iPrice =~ s!Â!!g;
443 0           $iPrice =~ s!Â!!g;
444 0           my $currency = $self->_currency_pattern;
445 0           my $W = $self->whitespace_pattern;
446 0           $iPrice =~ s!($currency)$W*($currency)!$1 (Buy-It-Now for $2)!;
447 0 0         if ($iPrice =~ s/FREE\s+SHIPPING//i)
448             {
449 0           $hit->shipping('free');
450             } # if
451 0           $hit->bid_amount($iPrice);
452 0           return 1;
453             } # _parse_price
454              
455             sub _parse_bids
456             {
457 0     0     my $self = shift;
458 0           my $oTDbids = shift;
459 0           my $hit = shift;
460 0           my $iBids = 0;
461 0 0         if (ref $oTDbids)
462             {
463 0 0         if (my $oChild = $oTDbids->look_down(_tag => 'div',
464             class => 'bids'))
465             {
466             # As of 2013-03, we need to separate out the price and the bid:
467 0           $oTDbids = $oChild;
468             } # if
469 0           my $s = $oTDbids->as_HTML;
470 0 0         if (DEBUG_COLUMNS || (1 < $self->{_debug}))
471             {
472 0           print STDERR " DDD TDbids ===$s===\n";
473             } # if
474 0 0         if ($oTDbids->attr('class') !~ m'\b(ebcBid|bids)\b')
475             {
476             # If we see this, we probably were searching for Store items
477             # but we ran off the bottom of the Store item list and ran
478             # into the list of Auction items.
479 0           return 0;
480             # There is a separate backend for searching Auction items!
481             } # if
482 0 0         $iBids = 1 if ($oTDbids->as_text =~ m/SOLD/i);
483 0 0         $iBids = $1 if ($oTDbids->as_text =~ m/(\d+)/);
484 0           my $W = $self->whitespace_pattern;
485 0 0 0       if (
486             # Bid listed as hyphen means no bids:
487             ($iBids =~ m!\A$W*-$W*\Z!)
488             ||
489             # Bid listed as whitespace means no bids:
490             ($iBids =~ m!\A$W*\Z!)
491             )
492             {
493 0           $iBids = 0;
494             } # if
495             } # if
496 0 0         if ($iBids =~ m/NO/i)
497             {
498 0           $iBids = 0;
499             } # if
500 0   0       $iBids ||= 0;
501             # print STDERR " DDD setting bid_count to =$iBids=\n";
502 0           $hit->bid_count($iBids);
503 0           return 1;
504             } # _parse_bids
505              
506             sub _parse_shipping
507             {
508 0     0     my $self = shift;
509 0           my $oTD = shift;
510 0           my $hit = shift;
511 0 0         if ($oTD->attr('class') =~ m'\bebcCty\b')
512             {
513             # If we see this, we probably were searching for UK auctions
514             # but we ran off the bottom of the UK item list and ran
515             # into the list of international items.
516 0           return 0;
517             } # if
518 0 0         if (my $oChild = $oTD->look_down(_tag => 'span',
519             class => 'ship'))
520             {
521             # As of 2013-03, we need to separate out the price and the
522             # shipping for some flavors of eBay:
523 0           $oTD = $oChild;
524             } # if
525 0           my $iPrice = $oTD->as_text;
526             # I don't know why there are sometimes weird characters in there:
527 0           $iPrice =~ s!Â!!g;
528 0           $iPrice =~ s!Â!!g;
529 0 0         print STDERR " DDD raw shipping ===$iPrice===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
530 0 0         if ($iPrice =~ m/FREE/i)
531             {
532 0           $iPrice = 0.00;
533             } # if
534 0 0         return 0 if ($iPrice !~ m/\d/);
535 0           $iPrice =~ s!£!GBP!;
536 0           $hit->shipping($iPrice);
537 0           return 1;
538             } # _parse_shipping
539              
540             sub _parse_skip
541             {
542 0     0     my $self = shift;
543 0           my $oTD = shift;
544 0           my $hit = shift;
545 0           return 1;
546             } # _parse_skip
547              
548             sub _parse_enddate
549             {
550 0     0     my $self = shift;
551 0           my $oTDdate = shift;
552 0           my $hit = shift;
553 0           my $sDate = 'unknown';
554 0           my ($s, $sDateTemp);
555 0 0         if (ref $oTDdate)
556             {
557 0           $sDateTemp = $oTDdate->as_text;
558 0           $s = $oTDdate->as_HTML;
559             } # if
560             else
561             {
562 0           $sDateTemp = $s = $oTDdate;
563             }
564 0 0         print STDERR " DDD TDdate ===$s===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
565             # New version as of 2013-03:
566 0 0         if ($s =~ m/\bTIMEMS="(\d+)"/i)
567             {
568 0           $sDate = $1;
569 0           $sDate = $self->_format_date(ParseDate(q{epoch }. int($sDate/1000)));
570 0 0         print STDERR " DDD sDate =$sDate=\n" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
571 0           $hit->end_date($sDate);
572             # For backward-compatibility:
573 0           $hit->change_date($sDate);
574 0           return 1;
575             }
576 0 0         if (ref($oTDdate))
577             {
578 0   0       my $sClass = $oTDdate->attr('class') || q{};
579 0 0         if ($sClass !~ m/\b(col3|ebcTim|ti?me)\b/)
580             {
581             # If we see this, we probably were searching for Buy-It-Now items
582             # but we ran off the bottom of the item list and ran into the list
583             # of Store items.
584 0           return 0;
585             # There is a separate backend for searching Store items!
586             } # if
587             } # if
588 0 0         print STDERR " DDD raw sDateTemp ===$sDateTemp===\n" if (DEBUG_DATES || (1 < $self->{_debug}));
589 0 0         if ($sDateTemp =~ m/---/)
590             {
591             # If we see this, we probably were searching for Buy-It-Now items
592             # but we ran off the bottom of the item list and ran into the list
593             # of Store items.
594 0           return 0;
595             # There is a separate backend for searching Store items!
596             } # if
597             # I don't know why there are sometimes weird characters in there:
598 0           $sDateTemp =~ s!Â!!g;
599 0           $sDateTemp =~ s!Â!!g;
600 0           $sDateTemp =~ s!
601             # Convert nbsp to regular space:
602 0           $sDateTemp =~ s!\240!\040!g;
603 0           $sDateTemp =~ s!Time\s+left:!!g;
604 0           $sDateTemp = $self->_process_date_abbrevs($sDateTemp);
605 0 0         print STDERR " DDD cooked sDateTemp ===$sDateTemp===\n" if (DEBUG_DATES || (1 < $self->{_debug}));
606 0 0         print STDERR " DDD official time =====$self->{_ebay_official_time}=====\n" if (DEBUG_DATES || (1 < $self->{_debug}));
607 0           my $date = DateCalc($self->{_ebay_official_time}, " + $sDateTemp");
608 0 0         print STDERR " DDD date ===$date===\n" if (DEBUG_DATES || (1 < $self->{_debug}));
609 0           $sDate = $self->_format_date($date);
610 0 0         print STDERR " DDD sDate ===$sDate===\n" if (DEBUG_DATES || (1 < $self->{_debug}));
611 0           $hit->end_date($sDate);
612             # For backward-compatibility:
613 0           $hit->change_date($sDate);
614 0           return 1;
615             } # _parse_enddate
616              
617              
618             =item result_as_HTML
619              
620             Given a WWW::SearchResult object representing an auction, formats it
621             human-readably with HTML.
622              
623             An optional second argument is the date format,
624             a string as specified for Date::Manip::UnixDate.
625             Default is '%Y-%m-%d %H:%M:%S'
626              
627             my $sHTML = $oSearch->result_as_HTML($oSearchResult, '%H:%M %b %E');
628              
629             =cut
630              
631             sub result_as_HTML
632             {
633 0     0 1   my $self = shift;
634 0 0         my $oSR = shift or return '';
635 0   0       my $sDateFormat = shift || q'%Y-%m-%d %H:%M:%S';
636 0   0       my $dateEnd = ParseDate($oSR->end_date) || q{};
637 0           my $iItemNum = $oSR->item_number;
638 0 0         my $sSold = $oSR->sold
639             ? $cgi->font({color=>'green'}, 'sold') .q{; }
640             : $cgi->font({color=>'red'}, 'not sold') .q{; };
641 0           my $sBids = $self->_bidcount_as_text($oSR);
642 0           my $sPrice = $self->_bidamount_as_text($oSR);
643 0           my $sEndedColor = 'green';
644 0           my $sEndedWord = 'ends';
645 0           my $dateNow = ParseDate('now');
646 0 0         print STDERR " DDD compare end_date ==$dateEnd==\n" if (DEBUG_DATES || (1 < $self->{_debug}));
647 0 0         print STDERR " DDD compare date_now ==$dateNow==\n" if (DEBUG_DATES || (1 < $self->{_debug}));
648 0 0         if (Date_Cmp($dateEnd, $dateNow) < 0)
649             {
650 0           $sEndedColor = 'red';
651 0           $sEndedWord = 'ended';
652             } # if
653 0           my $sEnded = $cgi->font({ color => $sEndedColor },
654             UnixDate($dateEnd,
655             qq"$sEndedWord $sDateFormat"));
656 0           my $s = $cgi->b(
657             $cgi->a({href => $oSR->url}, $oSR->title),
658             $cgi->br,
659             qq{$sEnded; $sSold$sBids$sPrice},
660             );
661 0           $s .= $cgi->br;
662 0           $s .= $cgi->font({size => -1},
663             $cgi->a({href => qq{http://cgi.ebay.com/ws/eBayISAPI.dll?MakeTrack&item=$iItemNum}}, 'watch this item in MyEbay'),
664             );
665             # Format the entire thing as Helvetica:
666 0           $s = $cgi->font({face => 'Arial, Helvetica'}, $s);
667 0           return $s;
668             } # result_as_HTML
669              
670              
671             =back
672              
673             =head1 METHODS TO BE OVERRIDDEN IN SUBCLASSING
674              
675             You only need to read about these if you are subclassing this module
676             (i.e. making a backend for another flavor of eBay search).
677              
678             =over
679              
680             =cut
681              
682              
683             =item _get_result_count_elements
684              
685             Given an HTML::TreeBuilder object,
686             return a list of HTML::Element objects therein
687             which could possibly contain the approximate result count verbiage.
688              
689             =cut
690              
691             sub _get_result_count_elements
692             {
693 0     0     my $self = shift;
694 0           my $tree = shift;
695 0           my @ao;
696 0           push @ao, $tree->look_down( # as of 2015-06
697             '_tag' => 'span',
698             class => 'listingscnt'
699             );
700 0           push @ao, $tree->look_down(
701             '_tag' => 'div',
702             class => 'fpcc'
703             );
704 0           push @ao, $tree->look_down(
705             '_tag' => 'div',
706             class => 'fpc'
707             );
708 0           push @ao, $tree->look_down(
709             # For basic search, as of 2013-03:
710             '_tag' => 'div',
711             class => 'clt'
712             );
713 0           push @ao, $tree->look_down(
714             '_tag' => 'div',
715             class => 'count'
716             );
717 0           push @ao, $tree->look_down(
718             '_tag' => 'div',
719             class => 'pageCaptionDiv'
720             );
721 0           push @ao, $tree->look_down( # for BySellerID as of 2010-07
722             '_tag' => 'div',
723             id => 'rsc'
724             );
725 0           return @ao;
726             } # _get_result_count_elements
727              
728              
729             =item _get_itemtitle_tds
730              
731             Given an HTML::TreeBuilder object,
732             return a list of HTML::Element objects therein
733             representing elements
734             which could possibly contain the HTML for result title and hotlink.
735              
736             =cut
737              
738             sub _get_itemtitle_tds
739             {
740 0     0     my $self = shift;
741 0           my $tree = shift;
742 0           my @ao = $tree->look_down(_tag => 'td',
743             class => 'details',
744             );
745 0           push @ao, $tree->look_down(_tag => 'td',
746             class => 'ebcTtl',
747             );
748 0           push @ao, $tree->look_down(_tag => 'td',
749             class => 'dtl', # This is for eBay auctions as of 2010-07
750             );
751             # This is for BuyItNow (thanks to Brian Wilson):
752 0           push @ao, $tree->look_down(_tag => 'td',
753             class => 'details ttl',
754             );
755 0           my $oDiv = $tree->look_down(_tag => 'div',
756             id => 'ResultSetItems',
757             );
758 0 0         if (ref $oDiv)
759             {
760 0           push @ao, $oDiv->look_down(_tag => 'td',
761             class => 'dtl dtlsp',
762             );
763 0           push @ao, $oDiv->look_down(_tag => 'h3',
764             class => 'lvtitle',
765             );
766             } # if
767 0           return @ao;
768             } # _get_itemtitle_tds
769              
770              
771             sub _parse_tree
772             {
773 0     0     my $self = shift;
774 0           my $tree = shift;
775 0 0         print STDERR " FFF Ebay::_parse_tree\n" if (1 < $self->{_debug});
776 0   0       my $sTitle = $self->{response}->header('title') || '';
777 0           my $qrTitle = $self->_title_pattern;
778             # print STDERR " DDD trying to match ==$sTitle== against ==$qrTitle==\n";
779 0 0         if ($sTitle =~ m!$qrTitle!)
780             {
781 0           my ($sTitle, $iItem, $sDateRaw) = ($1, $2, $3);
782 0           my $sDateCooked = $self->_format_date($sDateRaw);
783 0           my $hit = new WWW::Search::Result;
784 0           $hit->item_number($iItem);
785 0           $hit->end_date($sDateCooked);
786             # For backward-compatibility:
787 0           $hit->change_date($sDateCooked);
788 0           $hit->title($sTitle);
789 0           $hit->add_url($self->{response}->request->uri);
790 0           $hit->description($self->_create_description($hit));
791             # print Dumper($hit);
792 0           push(@{$self->{cache}}, $hit);
  0            
793 0           $self->{'_num_hits'}++;
794 0           $self->approximate_result_count(1);
795 0           return 1;
796             } # if
797              
798             # First, see if: there were zero results and eBay automatically did
799             # a spell-check and searched for other words (or searched for a
800             # subset of query terms):
801 0           my $oDIV = $tree->look_down(
802             _tag => 'div',
803             class => 'messages',
804             );
805 0 0         if (ref $oDIV)
806             {
807 0           my $sText = $oDIV->as_text;
808 0 0 0       if (
      0        
809             ($sText =~ m/0 results found for /)
810             &&
811             (
812             ($sText =~ m/ so we searched for /)
813             ||
814             ($sText =~ m/ so we removed keywords /)
815             )
816             )
817             {
818 0           $self->approximate_result_count(0);
819 0           return 0;
820             } # if
821             } # if
822              
823             # See if our query was completely replaced by a similar-spelling query:
824 0           my $oLI = $tree->look_down(_tag => 'li',
825             class => 'ebInf',
826             );
827 0 0         if (ref $oLI)
828             {
829 0 0         if ($oLI->as_text =~ m! keyword has been replaced !)
830             {
831 0           $self->approximate_result_count(0);
832 0           return 0;
833             } # if
834             } # if
835              
836             # See if our category-only query was replaced by a global query:
837 0           my $oP = $tree->look_down(_tag => 'p',
838             class => 'sm-md',
839             );
840 0 0         if (ref $oP)
841             {
842 0           my $s = $oP->as_text;
843 0 0 0       if (($s =~ m/0 results found in the/) && ($s =~ m/so we searched in all categories/))
844             {
845 0           return 0;
846             } # if
847             } # if
848              
849 0           my $iHits = 0;
850              
851             # The hit count is in one of these tags:
852 0           my @aoResultCountTagset = $self->_get_result_count_elements($tree);
853 0 0         if (scalar(@aoResultCountTagset) < 1)
854             {
855 0           warn " EEE no result_count_elements matched the HTML\n";
856             } # if
857             FONT:
858 0           foreach my $oFONT (@aoResultCountTagset)
859             {
860 0           my $qr = $self->_result_count_pattern;
861             print STDERR (" DDD result_count try ==",
862 0 0         $oFONT->as_text, "== against qr=$qr=\n") if (1 < $self->{_debug});
863 0 0         if ($oFONT->as_text =~ m!$qr!)
864             {
865 0           my $sCount = $1;
866 0 0         print STDERR " DDD matched ($sCount)\n" if (1 < $self->{_debug});
867             # Make sure it's an integer:
868 0           $sCount =~ s!,!!g;
869 0           $self->approximate_result_count(0 + $sCount);
870 0           last FONT;
871             } # if
872             } # foreach
873              
874 0 0         if ($self->approximate_result_count() < 1)
875             {
876 0           return $iHits;
877             } # if
878              
879             # Recursively parse the stats telling how many items were found in
880             # each category:
881 0           my $oUL = $tree->look_down(_tag => 'ul',
882             class => 'categories');
883 0   0       $self->{categories} ||= [];
884 0 0         $self->_parse_category_list($oUL, $self->{categories}) if ref($oUL);
885              
886             # First, delete all the results that came from spelling variations:
887 0           my $oDiv = $tree->look_down(_tag => 'div',
888             id => 'expSplChk',
889             );
890 0 0         if (ref $oDiv)
891             {
892             # print STDERR " DDD found a spell-check ===", $oDiv->as_text, "===\n";
893 0           $oDiv->detach;
894 0           $oDiv->delete;
895             } # if
896             # The list of matching items is in a table. The first column of the
897             # table is nothing but icons; the second column is the good stuff.
898 0           my @aoTD = $self->_get_itemtitle_tds($tree);
899 0 0         unless (@aoTD)
900             {
901 0 0         print STDERR " EEE did not find table of results\n" if $self->{_debug};
902             # use File::Slurp;
903             # write_file('no-results.html', $self->{response}->content);
904             } # unless
905 0           my $qrItemNum = qr{(\d{11,13})};
906             TD:
907 0           foreach my $oTDtitle (@aoTD)
908             {
909             # Sanity check:
910 0 0         next TD unless ref $oTDtitle;
911 0           my $sTDtitle = $oTDtitle->as_HTML;
912 0 0         print STDERR " DDD try TDtitle ===$sTDtitle===\n" if (1 < $self->{_debug});
913             # First A tag contains the url & title:
914 0           my $oA = $oTDtitle->look_down('_tag', 'a');
915 0 0         next TD unless ref $oA;
916             # This is needed for Ebay::UK to make sure we're looking at the right TD:
917 0   0       my $sTitle = $oA->as_text || '';
918 0 0         next TD if ($sTitle eq '');
919 0 0         print STDERR " DDD sTitle ===$sTitle===\n" if (1 < $self->{_debug});
920 0           my $oURI = URI->new($oA->attr('href'));
921             # next TD unless ($oURI =~ m!ViewItem!);
922 0 0         next TD if ($oURI !~ m!$qrItemNum!);
923 0           my $iItemNum = $1;
924 0 0         print STDERR " DDD iItemNum ===$iItemNum===\n" if (1 < $self->{_debug});
925 0           my $iCategory = -1;
926 0 0         $iCategory = $1 if ($oURI =~ m!QQcategoryZ(\d+)QQ!);
927 0 0         if ($oURI->as_string =~ m!QQitemZ(\d+)QQ!)
928             {
929             # Convert new eBay links to old reliable ones:
930             # $oURI->path('');
931 0           $oURI->path('/ws/eBayISAPI.dll');
932 0           $oURI->query("ViewItem&item=$1");
933             } # if
934 0           my $sURL = $oURI->as_string;
935 0           my $hit = new WWW::Search::Result;
936 0           $hit->add_url($self->_cleanup_url($sURL));
937 0           $hit->title($sTitle);
938 0           $hit->item_number($iItemNum);
939             # This is just to prevent undef warnings later on:
940 0           $hit->bid_count(0);
941             # The rest of the info about this item is in sister
  • elements
  • 942             # to the right:
    943 0           my @aoSibs = $oTDtitle->parent->look_down(_tag => q{li});
    944             # The parent itself is an
  • tag:
  • 945 0           shift @aoSibs;
    946 0 0         warn " DDD before loop, there are ", scalar(@aoSibs), " sibling TDs\n" if (1 < $self->{_debug});
    947             SIBLING_TD:
    948 0           while (my $oTDsib = shift @aoSibs)
    949             {
    950 0 0         next unless ref($oTDsib);
    951 0   0       my $sColumn = $oTDsib->attr('class') || q{};
    952 0           my $s = $oTDsib->as_HTML;
    953 0 0         if ($sColumn eq q{})
    954             {
    955 0 0         warn " WWW auction info sibling has no class ==$s==" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
    956             } # if
    957 0 0         print STDERR " DDD looking at TD'$sColumn' ===$s===\n" if (DEBUG_COLUMNS || (1 < $self->{_debug}));
    958 0 0         if ($sColumn =~ m'price')
    959             {
    960 0 0         next TD unless $self->_parse_price($oTDsib, $hit);
    961             } # if
    962 0 0 0       if (($sColumn =~ m'bids') || ($sColumn =~ m'format'))
    963             {
    964             # It is not a fatal error if there are no bids (i.e. buy-it-now)
    965 0           $self->_parse_bids($oTDsib, $hit);
    966             }
    967 0 0         if ($sColumn =~ m'shipping')
    968             {
    969 0 0         next TD if ! $self->_parse_shipping($oTDsib, $hit);
    970             }
    971 0 0         if ($sColumn =~ m'end')
    972             {
    973 0 0         next TD if ! $self->_parse_enddate($oTDsib, $hit);
    974             }
    975 0 0         if ($sColumn =~ 'time')
    976             {
    977 0 0         next TD if ! $self->_parse_enddate($oTDsib, $hit);
    978             }
    979 0 0         if ($sColumn =~ m'country')
    980             {
    981             # This listing is from a country other than the base site
    982             # we're searching against. Throw it out:
    983 0           next TD;
    984             }
    985 0 0         if ($sColumn =~ m'extras')
    986             {
    987 0 0         if ($iCategory < 0)
    988             {
    989             # We haven't found this item's category. Look for it here:
    990 0           $iCategory = $self->_parse_category($oTDsib);
    991             } # if
    992             } # if 'extras'
    993             # Any other class="" value will cause the
  • to be ignored.
  • 994             } # while
    995 0           my $sDesc = $self->_create_description($hit);
    996 0           $hit->description($sDesc);
    997 0           $hit->category($iCategory);
    998             # Clean up / sanity check hit info:
    999 0           my ($enddate, $iBids);
    1000 0 0 0       if (
          0        
          0        
    1001             defined($enddate = $hit->end_date)
    1002             &&
    1003             defined($iBids = $hit->bid_count)
    1004             &&
    1005             (0 < $iBids) # Item got any bids
    1006             &&
    1007             (Date_Cmp($enddate, 'now') < 0) # Item is ended
    1008             )
    1009             {
    1010             # Item must have been sold!?!
    1011 0           $hit->sold(1);
    1012             } # if
    1013 0 0         print STDERR " DDD add hit to cache\n" if (1 < $self->{_debug});
    1014 0           push(@{$self->{cache}}, $hit);
      0            
    1015 0           $self->{'_num_hits'}++;
    1016 0           $iHits++;
    1017             # Delete this HTML element so that future searches go faster?
    1018 0           $oTDtitle->detach;
    1019 0           $oTDtitle->delete;
    1020             } # foreach TD
    1021              
    1022 0           undef $self->{_next_url};
    1023 0           if (0)
    1024             {
    1025             # AS OF 2008-11 THE NEXT LINK CAN NOT BE FOLLOWED FROM PERL CODE
    1026              
    1027             # Look for a NEXT link:
    1028             my @aoA = $tree->look_down('_tag' => 'a');
    1029             TRY_NEXT:
    1030             foreach my $oA (0, reverse @aoA)
    1031             {
    1032             next TRY_NEXT unless ref $oA;
    1033             print STDERR " DDD try NEXT A ===", $oA->as_HTML, "===\n" if (1 < $self->{_debug});
    1034             my $href = $oA->attr('href');
    1035             next TRY_NEXT unless $href;
    1036             # Looking backwards from the bottom of the page, if we get all the
    1037             # way to the item list, there must be no next button:
    1038             last TRY_NEXT if ($href =~ m!ViewItem!);
    1039             if ($oA->as_text eq $self->_next_text)
    1040             {
    1041             print STDERR " DDD got NEXT A ===", $oA->as_HTML, "===\n" if 1 < $self->{_debug};
    1042             my $sClass = $oA->attr('class') || '';
    1043             if ($sClass =~ m/disabled/i)
    1044             {
    1045             last TRY_NEXT;
    1046             } # if
    1047             $self->{_next_url} = $self->absurl($self->{_prev_url}, $href);
    1048             last TRY_NEXT;
    1049             } # if
    1050             } # foreach
    1051             } # if 0
    1052              
    1053             # All done with this page.
    1054 0           $tree->delete;
    1055 0           return $iHits;
    1056             } # _parse_tree
    1057              
    1058              
    1059             =item _parse_category_list
    1060              
    1061             Parses the Category list from the left side of the results page.
    1062             So far,
    1063             this method can handle every type of eBay search currently implemented.
    1064             If you find that it doesn't suit your needs,
    1065             please contact the author because it's probably just a tiny tweak that's needed.
    1066              
    1067             =cut
    1068              
    1069             sub _parse_category_list
    1070             {
    1071 0     0     my $self = shift;
    1072 0           my $oTree = shift;
    1073 0           my $ra = shift;
    1074 0           my $oUL = $oTree->look_down(_tag => 'ul');
    1075 0           my @aoLI = $oUL->look_down(_tag => 'li');
    1076             CATLIST_LI:
    1077 0           foreach my $oLI (@aoLI)
    1078             {
    1079 0           my %hash;
    1080 0 0         next CATLIST_LI unless ref($oLI);
    1081 0 0         if ($oLI->parent->same_as($oUL))
    1082             {
    1083 0           my $oA = $oLI->look_down(_tag => 'a');
    1084 0 0         next CATLIST_LI unless ref($oA);
    1085 0           my $oSPAN = $oLI->look_down(_tag => 'span');
    1086 0 0         next CATLIST_LI unless ref($oSPAN);
    1087 0           $hash{'Name'} = $oA->as_text;
    1088 0           $hash{'ID'} = $oA->{'href'};
    1089 0           $hash{'ID'} =~ /sacatZ([0-9]+)/;
    1090 0           $hash{'ID'} = $1;
    1091 0           my $i = $oSPAN->as_text;
    1092 0           $i =~ tr/0-9//cd;
    1093 0           $hash{'Count'} = $i;
    1094 0           push @{$ra}, \%hash;
      0            
    1095             } # if
    1096 0           my @aoUL = $oLI->look_down(_tag => 'ul');
    1097             CATLIST_UL:
    1098 0           foreach my $oUL (@aoUL)
    1099             {
    1100 0 0         next CATLIST_UL unless ref($oUL);
    1101 0 0         if($oUL->parent()->same_as($oLI))
    1102             {
    1103 0           $hash{'Subcategory'} = ();
    1104 0           $self->_parse_category_list($oLI, \@{$hash{'Subcategory'}});
      0            
    1105             } # if
    1106             } # foreach CATLIST_UL
    1107             } # foreach CATLIST_LI
    1108             } # _parse_category_list
    1109              
    1110              
    1111             =item _process_date_abbrevs
    1112              
    1113             Given a date string,
    1114             converts common abbreviations to their full words
    1115             (so that the string can be unambiguously parsed by Date::Manip).
    1116             For example,
    1117             in the default English, 'd' becomes 'days'.
    1118              
    1119             =cut
    1120              
    1121             sub _process_date_abbrevs
    1122             {
    1123 0     0     my $self = shift;
    1124 0           my $s = shift;
    1125 0           $s =~ s!d! days!;
    1126 0           $s =~ s!h! hours!;
    1127 0           $s =~ s!m! minutes!;
    1128 0           return $s;
    1129             } # _process_date_abbrevs
    1130              
    1131              
    1132             =item _next_text
    1133              
    1134             The text of the "Next" button, localized for a specific type of eBay backend.
    1135              
    1136             =cut
    1137              
    1138             sub _next_text
    1139             {
    1140 0     0     return 'Next';
    1141             } # _next_text
    1142              
    1143              
    1144             =item whitespace_pattern
    1145              
    1146             Return a qr// pattern to match whitespace your webpage's language.
    1147              
    1148             =cut
    1149              
    1150             sub whitespace_pattern
    1151             {
    1152             # A pattern to match HTML whitespace:
    1153 0     0 1   return qr{[\ \t\r\n\240]};
    1154             } # whitespace_pattern
    1155              
    1156             =item _currency_pattern
    1157              
    1158             Return a qr// pattern to match mentions of money in your webpage's language.
    1159             Include the digits in the pattern.
    1160              
    1161             =cut
    1162              
    1163             sub _currency_pattern
    1164             {
    1165 0     0     my $self = shift;
    1166             # A pattern to match all possible currencies found in USA eBay
    1167             # listings:
    1168 0           my $W = $self->whitespace_pattern;
    1169 0           return qr/(?:\$|C|EUR|GBP)$W*[0-9.,]+/;
    1170             } # _currency_pattern
    1171              
    1172              
    1173             =item _title_pattern
    1174              
    1175             Return a qr// pattern to match the webpage title in your webpage's language.
    1176             Add grouping parenthesis so that
    1177             $1 becomes the auction title,
    1178             $2 becomes the eBay item number, and
    1179             $3 becomes the end date.
    1180              
    1181             =cut
    1182              
    1183             sub _title_pattern
    1184             {
    1185 0     0     return qr{\A(.+?)\s+-\s+EBAY\s+\(ITEM\s+(\d+)\s+END\s+TIME\s+([^)]+)\)\Z}i; #
    1186             } # _title_pattern
    1187              
    1188              
    1189             =item _result_count_pattern
    1190              
    1191             Return a qr// pattern to match the result count in your webpage's language.
    1192             Include parentheses so that $1 becomes the number (with commas is OK).
    1193              
    1194             =cut
    1195              
    1196             sub _result_count_pattern
    1197             {
    1198 0     0     return qr'([0-9,]+)\s+(active\s+)?(listing|item|matche?|result)s?(\s+found)?';
    1199             } # _result_count_pattern
    1200              
    1201              
    1202             1;
    1203              
    1204             __END__