File Coverage

blib/lib/WWW/Search/Simple.pm
Criterion Covered Total %
statement 18 62 29.0
branch 0 18 0.0
condition 0 3 0.0
subroutine 6 8 75.0
pod n/a
total 24 91 26.3


line stmt bran cond sub pod time code
1              
2             package WWW::Search::Simple;
3              
4 1     1   2080 use strict;
  1         2  
  1         31  
5 1     1   6 use warnings;
  1         1  
  1         33  
6              
7             =head1 NAME
8              
9             WWW::Search::Simple - class for searching any web site
10              
11             =head1 SYNOPSIS
12              
13             require WWW::Search;
14             $search = new WWW::Search('Simple');
15              
16             =head1 DESCRIPTION
17              
18             This class is a specialization of WWW::Search for simple web based
19             search indices. It extracts all links from a given page.
20              
21             This class exports no public interface; all interaction should be done
22             through WWW::Search objects.
23              
24             Note that this module will probably get a lot of false hits.
25              
26             =head1 AUTHOR
27              
28             C is written by Paul Lindner,
29              
30             =head1 COPYRIGHT
31              
32             Copyright (c) 1997,98 by the United Nations Administrative Committee
33             on Coordination (ACC)
34              
35             All rights reserved.
36              
37             =cut
38              
39              
40 1     1   4 use base 'WWW::Search';
  1         2  
  1         92  
41              
42 1     1   6 use Carp ();
  1         2  
  1         17  
43 1     1   5 use HTML::TreeBuilder;
  1         2  
  1         13  
44 1     1   41 use WWW::SearchResult;
  1         2  
  1         630  
45              
46             my $debug = 0;
47              
48             sub _native_setup_search
49             {
50 0     0     my ($self, $native_query, $native_opt) = @_;
51 0           my ($native_url);
52 0           my ($default_native_url) = "http://www.itu.int/cgi-bin/SFgate?application=itu&database=local//usr/local/wais/WWW/www-pages&listenv=table&httppath=/usr/local/www-data/&httpprefix=/&tie=and&maxhits=%n&text=%s";
53 0 0         if (defined($native_opt))
54             {
55             #print "Got " . join(' ', keys(%$native_opt)) . "\n";
56             # Process options..
57             # Substitute query terms for %s...
58            
59 0 0 0       if ($self->{'search_url'} && $native_opt->{'search_args'})
60             {
61 0           $native_url = $native_opt->{'search_url'} . "?" . $native_opt->{'search_args'};
62             } # if
63             } # if
64 0 0         $native_url = $default_native_url if (!$native_url);
65 0           $native_url =~ s/%s/$native_query/g; # Substitute search terms...
66 0           $self->user_agent();
67 0           $self->{_next_to_retrieve} = 0;
68 0           $self->{_base_url} = $self->{_next_url} = $native_url;
69             } # _native_setup_search
70              
71             sub _native_retrieve_some
72             {
73 0     0     my ($self) = @_;
74 0           my ($hit) = ();
75 0           my ($hits_found) = 0;
76            
77             # fast exit if already done
78 0 0         return undef if (!defined($self->{_next_url}));
79              
80             # get some
81 0 0         print "GET " . $self->{_next_url} . "\n" if ($debug);
82             my($response) = $self->http_request($self->{search_method},
83 0           $self->{_next_url});
84            
85 0           $self->{response} = $response;
86 0 0         if (!$response->is_success)
87             {
88 0 0         print "Some problem\n" if ($debug);
89 0           return undef;
90             }
91              
92 0           my $score = 800;
93 0           my $results = $response->content();
94              
95 0           my($h) = new HTML::TreeBuilder;
96 0           $h->parse($results);
97 0           for (@{ $h->extract_links(qw(a)) })
  0            
98             {
99 0           my($link, $linkelem) = @$_;
100            
101 0           my($linkobj) = new URI::URL $link, $self->{_next_url};
102 0 0         print "Fixing $link\n" if ($debug);
103            
104 0           my($hit) = new WWW::SearchResult;
105 0           $hit->add_url($linkobj->abs->as_string());
106 0           $hit->title(join(' ',@{$linkelem->content}));
  0            
107 0           $hit->score($score);
108 0           $hit->normalized_score($score);
109 0 0         if ($hit->title !~ /HASH\(0x/)
110             {
111 0           $hits_found++;
112 0           push(@{$self->{cache}}, $hit);
  0            
113             } # if
114 0           $score = int ($score * .95);
115             } # for
116 0           $self->approximate_result_count($hits_found);
117 0           $self->{_next_url} = undef;
118 0           return($hits_found);
119             } # _native_retrieve_some
120              
121             1;
122