File Coverage

blib/lib/WWW/Wikipedia.pm
Criterion Covered Total %
statement 72 74 97.3
branch 16 20 80.0
condition 10 13 76.9
subroutine 16 16 100.0
pod 7 7 100.0
total 121 130 93.0


line stmt bran cond sub pod time code
1             package WWW::Wikipedia;
2              
3 11     11   128148 use 5.006;
  11         35  
4 11     11   68 use strict;
  11         22  
  11         191  
5 11     11   47 use warnings;
  11         21  
  11         279  
6 11     11   50 use Carp qw( croak );
  11         18  
  11         478  
7 11     11   3887 use URI::Escape ();
  11         15403  
  11         232  
8 11     11   2834 use WWW::Wikipedia::Entry;
  11         29  
  11         312  
9              
10 11     11   57 use base qw( LWP::UserAgent );
  11         19  
  11         5805  
11              
12             our $VERSION = '2.05';
13              
14 11         553 use constant WIKIPEDIA_URL =>
15 11     11   331151 'http://%s.wikipedia.org/w/index.php?title=%s&action=raw';
  11         29  
16 11         5368 use constant WIKIPEDIA_RAND_URL =>
17 11     11   94 'http://%s.wikipedia.org/wiki/Special:Random';
  11         24  
18              
19             =head1 NAME
20              
21             WWW::Wikipedia - Automated interface to the Wikipedia
22              
23             =head1 SYNOPSIS
24              
25             use WWW::Wikipedia;
26             my $wiki = WWW::Wikipedia->new();
27              
28             ## search for 'perl'
29             my $result = $wiki->search( 'perl' );
30              
31             ## if the entry has some text print it out
32             if ( $result->text() ) {
33             print $result->text();
34             }
35              
36             ## list any related items we can look up
37             print join( "\n", $result->related() );
38              
39             =head1 DESCRIPTION
40              
41             WWW::Wikipedia provides an automated interface to the Wikipedia
42             L, which is a free, collaborative, online
43             encyclopedia. This module allows you to search for a topic and return the
44             resulting entry. It also gives you access to related topics which are also
45             available via the Wikipedia for that entry.
46              
47              
48             =head1 INSTALLATION
49              
50             To install this module type the following:
51              
52             perl Makefile.PL
53             make
54             make test
55             make install
56              
57             =head1 METHODS
58              
59             =head2 new()
60              
61             The constructor. You can pass it a two letter language code, or nothing
62             to let it default to 'en'.
63              
64             ## Default: English
65             my $wiki = WWW::Wikipedia->new();
66              
67             ## use the French wiki instead
68             my $wiki = WWW::Wikipedia->new( language => 'fr' );
69              
70             WWW::Wikipedia is a subclass of LWP::UserAgent. If you would
71             like to have more control over the user agent (control timeouts, proxies ...)
72             you have full access.
73              
74             ## set HTTP request timeout
75             my $wiki = WWW::Wikipedia->new();
76             $wiki->timeout( 2 );
77              
78              
79             You can turn off the following of wikipedia redirect directives by passing
80             a false value to C.
81              
82             Together with the Wiki markup, some entries include HTML tags.
83             They can be stripped out using the C option:
84              
85             my $wiki = WWW::Wikipedia->new( clean_html => 1 );
86              
87             See C documentation below for details.
88              
89             =cut
90              
91             sub new {
92 12     12 1 852 my ( $class, %opts ) = @_;
93              
94 12   100     83 my $language = delete $opts{ language } || 'en';
95 12         37 my $follow = delete $opts{ follow_redirects };
96 12 50       43 $follow = 1 if !defined $follow;
97 12   100     107 my $clean_html = delete $opts{ clean_html } || 0;
98              
99 12         108 my $self = LWP::UserAgent->new( %opts );
100 12         21270 $self->agent( 'WWW::Wikipedia' );
101 12   33     911 bless $self, ref( $class ) || $class;
102              
103 12         96 $self->language( $language );
104 12         118 $self->follow_redirects( $follow );
105 12         44 $self->clean_html( $clean_html );
106 12         71 $self->parse_head( 0 );
107 12         1048 return $self;
108             }
109              
110             =head2 language()
111              
112             This allows you to get and set the language you want to use. Two letter
113             language codes should be used. The default is 'en'.
114              
115             my $wiki = WWW::Wikipedia->new( language => 'es' );
116            
117             # Later on...
118             $wiki->language( 'fr' );
119              
120             =cut
121              
122             sub language {
123 30     30 1 2295 my ( $self, $language ) = @_;
124 30 100       142 $self->{ language } = $language if $language;
125 30         132 return $self->{ language };
126             }
127              
128              
129             =head2 clean_html()
130              
131             Allows you to get/set if HTML is being stripped out.
132              
133             # set HTML strip
134             $wiki->clean_html( 1 );
135              
136             This option removes all tags and attributes they might have.
137             Their contents, however, is maintained (for now). Comments are
138             also removed.
139              
140             =cut
141              
142             sub clean_html {
143 12     12 1 32 my ( $self, $bool ) = @_;
144 12 50       50 $self->{ clean_html } = $bool if defined $bool;
145 12         26 return $self->{ clean_html };
146             }
147              
148             =head2 follow_redirects()
149              
150             By default, wikipeda redirect directives are followed. Set this to false to
151             turn that off.
152              
153             =cut
154              
155             sub follow_redirects {
156 23     23 1 593 my ( $self, $value ) = @_;
157 23 100       108 $self->{ follow_redirects } = $value if defined $value;
158 23         110 return $self->{ follow_redirects };
159             }
160              
161             =head2 search()
162              
163             Which performs the search and returns a WWW::Wikipedia::Entry object which
164             you can query further. See WWW::Wikipedia::Entry docs for more info.
165              
166             $entry = $wiki->search( 'Perl' );
167             print $entry->text();
168              
169             If there's a problem connecting to Wikipedia, C will be returned and the
170             error message will be stored in C.
171              
172             =cut
173              
174             sub search {
175 11     11 1 2653 my ( $self, $string ) = @_;
176              
177 11         51 $self->error( undef );
178              
179 11 50       50 croak( "search() requires you pass in a string" ) if !defined( $string );
180            
181 11 100       92 my $enc_string = utf8::is_utf8( $string )
182             ? URI::Escape::uri_escape_utf8( $string )
183             : URI::Escape::uri_escape( $string );
184 11         383 my $src = sprintf( WIKIPEDIA_URL, $self->language(), $enc_string );
185              
186 11         74 my $response = $self->get( $src );
187 11 100       4434511 if ( $response->is_success() ) {
188             my $entry = WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
189 10         196 clean_html => $self->{ clean_html } );
190              
191             # look for a wikipedia style redirect and process if necessary
192             # try to catch self-redirects
193 10 100 100     72 return $self->search( $1 )
      66        
194             if $self->follow_redirects && $entry->raw() =~ /^#REDIRECT\s*\[\[([^|\]]+)/is && $1 ne $string;
195              
196 9         630 return ( $entry );
197             }
198             else {
199 1         17 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
200 1         8 return undef;
201             }
202              
203             }
204              
205             =head2 random()
206              
207             This method fetches a random wikipedia page.
208              
209             =cut
210              
211             sub random {
212 1     1 1 765 my ( $self ) = @_;
213 1         3 my $src = sprintf( WIKIPEDIA_RAND_URL, $self->language() );
214 1         14 my $response = $self->get( $src );
215              
216 1 50       785043 if ( $response->is_success() ) {
217             # get the raw version of the current url
218 1         17 my( $title ) = $response->request->uri =~ m{\.org/wiki/(.+)$};
219 1         39 $src = sprintf( WIKIPEDIA_URL, $self->language(), $title );
220 1         7 $response = $self->get( $src );
221             return WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
222 1         251960 clean_html => $self->{ clean_html } );
223             }
224              
225 0         0 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
226 0         0 return;
227             }
228              
229             =head2 error()
230              
231             This is a generic error accessor/mutator. You can retrieve any searching error
232             messages here.
233              
234             =cut
235              
236             sub error {
237 13     13 1 436 my $self = shift;
238              
239 13 100       48 if ( @_ ) {
240 12         35 $self->{ _ERROR } = shift;
241             }
242              
243 13         33 return $self->{ _ERROR };
244             }
245              
246             =head1 TODO
247              
248             =over 4
249              
250             =item *
251              
252             Be more specific on the HTML clean methodology. For now all tags are removed,
253             keeping only their contents. In the future the behaviour might change
254             accordingly with each specific tag.
255              
256             =item *
257              
258             Watch the development of Special:Export XML formatting, eg:
259             http://en.wikipedia.org/wiki/Special:Export/perl
260              
261             =back
262              
263             =head1 SEE ALSO
264              
265             =over 4
266              
267             =item * L
268              
269             =back
270              
271             =head1 REPOSITORY
272              
273             L
274              
275             =head1 AUTHORS
276              
277             Ed Summers Eehs@pobox.comE
278              
279             Brian Cassidy Ebricas@cpan.orgE
280              
281             =head1 COPYRIGHT AND LICENSE
282              
283             Copyright 2003-2017 by Ed Summers
284              
285             This library is free software; you can redistribute it and/or modify
286             it under the same terms as Perl itself.
287              
288             =cut
289              
290             1;