File Coverage

blib/lib/WWW/Wikipedia.pm
Criterion Covered Total %
statement 67 74 90.5
branch 11 20 55.0
condition 4 13 30.7
subroutine 16 16 100.0
pod 7 7 100.0
total 105 130 80.7


line stmt bran cond sub pod time code
1             package WWW::Wikipedia;
2              
3 10     10   191484 use 5.006;
  10         36  
4 10     10   58 use strict;
  10         18  
  10         370  
5 10     10   66 use warnings;
  10         19  
  10         389  
6 10     10   52 use Carp qw( croak );
  10         17  
  10         621  
7 10     10   7262 use URI::Escape ();
  10         14347  
  10         258  
8 10     10   4810 use WWW::Wikipedia::Entry;
  10         33  
  10         401  
9              
10 10     10   93 use base qw( LWP::UserAgent );
  10         19  
  10         11994  
11              
12             our $VERSION = '2.03';
13              
14 10         800 use constant WIKIPEDIA_URL =>
15 10     10   441063 'http://%s.wikipedia.org/w/index.php?title=%s&action=raw';
  10         29  
16 10         7708 use constant WIKIPEDIA_RAND_URL =>
17 10     10   60 'http://%s.wikipedia.org/wiki/Special:Random';
  10         19  
18              
19             =head1 NAME
20              
21             WWW::Wikipedia - Automated interface to the Wikipedia
22              
23             =head1 SYNOPSIS
24              
25             use WWW::Wikipedia;
26             my $wiki = WWW::Wikipedia->new();
27              
28             ## search for 'perl'
29             my $result = $wiki->search( 'perl' );
30              
31             ## if the entry has some text print it out
32             if ( $result->text() ) {
33             print $result->text();
34             }
35              
36             ## list any related items we can look up
37             print join( "\n", $result->related() );
38              
39             =head1 DESCRIPTION
40              
41             WWW::Wikipedia provides an automated interface to the Wikipedia
42             L, which is a free, collaborative, online
43             encyclopedia. This module allows you to search for a topic and return the
44             resulting entry. It also gives you access to related topics which are also
45             available via the Wikipedia for that entry.
46              
47              
48             =head1 INSTALLATION
49              
50             To install this module type the following:
51              
52             perl Makefile.PL
53             make
54             make test
55             make install
56              
57             =head1 METHODS
58              
59             =head2 new()
60              
61             The constructor. You can pass it a two letter language code, or nothing
62             to let it default to 'en'.
63              
64             ## Default: English
65             my $wiki = WWW::Wikipedia->new();
66              
67             ## use the French wiki instead
68             my $wiki = WWW::Wikipedia->new( language => 'fr' );
69              
70             WWW::Wikipedia is a subclass of LWP::UserAgent. If you would
71             like to have more control over the user agent (control timeouts, proxies ...)
72             you have full access.
73              
74             ## set HTTP request timeout
75             my $wiki = WWW::Wikipedia->new();
76             $wiki->timeout( 2 );
77              
78              
79             You can turn off the following of wikipedia redirect directives by passing
80             a false value to C.
81              
82             Together with the Wiki markup, some entries include HTML tags.
83             They can be stripped out using the C option:
84              
85             my $wiki = WWW::Wikipedia->new( clean_html => 1 );
86              
87             See C documentation bellow for details.
88              
89             =cut
90              
91             sub new {
92 10     10 1 859 my ( $class, %opts ) = @_;
93              
94 10   100     74 my $language = delete $opts{ language } || 'en';
95 10         24 my $follow = delete $opts{ follow_redirects };
96 10 50       40 $follow = 1 if !defined $follow;
97 10   50     78 my $clean_html = delete $opts{ clean_html } || 0;
98              
99 10         96 my $self = LWP::UserAgent->new( %opts );
100 10         26737 $self->agent( 'WWW::Wikipedia' );
101 10   33     664 bless $self, ref( $class ) || $class;
102              
103 10         45 $self->language( $language );
104 10         165 $self->follow_redirects( $follow );
105 10         36 $self->clean_html( $clean_html );
106 10         69 $self->parse_head( 0 );
107 10         848 return $self;
108             }
109              
110             =head2 language()
111              
112             This allows you to get and set the language you want to use. Two letter
113             language codes should be used. The default is 'en'.
114              
115             my $wiki = WWW::Wikipedia->new( language => 'es' );
116            
117             # Later on...
118             $wiki->language( 'fr' );
119              
120             =cut
121              
122             sub language {
123 22     22 1 1113 my ( $self, $language ) = @_;
124 22 100       127 $self->{ language } = $language if $language;
125 22         102 return $self->{ language };
126             }
127              
128              
129             =head2 clean_html()
130              
131             Allows to get/set if HTML is being stripped out.
132              
133             # set HTML strip
134             $wiki->clean_html( 1 );
135              
136             This option removes all tags and attributes they might have.
137             Their contents, however, is maintained (for now). Comments are
138             also removed.
139              
140             =cut
141              
142             sub clean_html {
143 10     10 1 21 my ( $self, $bool ) = @_;
144 10 50       67 $self->{ clean_html } = $bool if defined $bool;
145 10         25 return $self->{ clean_html };
146             }
147              
148             =head2 follow_redirects()
149              
150             By default, wikipeda redirect directives are followed. Set this to false to
151             turn that off.
152              
153             =cut
154              
155             sub follow_redirects {
156 10     10 1 26 my ( $self, $value ) = @_;
157 10 50       61 $self->{ follow_redirects } = $value if defined $value;
158 10         24 return $self->{ follow_redirects };
159             }
160              
161             =head2 search()
162              
163             Which performs the search and returns a WWW::Wikipedia::Entry object which
164             you can query further. See WWW::Wikipedia::Entry docs for more info.
165              
166             $entry = $wiki->search( 'Perl' );
167             print $entry->text();
168              
169             If there's a problem connecting to Wikipedia, C will be returned and the
170             error message will be stored in C.
171              
172             =cut
173              
174             sub search {
175 6     6 1 2880 my ( $self, $string ) = @_;
176              
177 6         27 $self->error( undef );
178              
179 6 50       20 croak( "search() requires you pass in a string" ) if !defined( $string );
180            
181 6 50       71 my $enc_string = utf8::is_utf8( $string )
182             ? URI::Escape::uri_escape_utf8( $string )
183             : URI::Escape::uri_escape( $string );
184 6         229 my $src = sprintf( WIKIPEDIA_URL, $self->language(), $enc_string );
185              
186 6         43 my $response = $self->get( $src );
187 6 50       5710363 if ( $response->is_success() ) {
188             my $entry = WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
189 0         0 clean_html => $self->{ clean_html } );
190              
191             # look for a wikipedia style redirect and process if necessary
192             # try to catch self-redirects
193 0 0 0     0 return $self->search( $1 )
      0        
194             if $self->follow_redirects && $entry->raw() =~ /^#REDIRECT\s*\[\[([^|\]]+)/is && $1 ne $string;
195              
196 0         0 return ( $entry );
197             }
198             else {
199 6         105 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
200 6         164 return undef;
201             }
202              
203             }
204              
205             =head2 random()
206              
207             This method fetches a random wikipedia page.
208              
209             =cut
210              
211             sub random {
212 1     1 1 459 my ( $self ) = @_;
213 1         4 my $src = sprintf( WIKIPEDIA_RAND_URL, $self->language() );
214 1         17 my $response = $self->get( $src );
215              
216 1 50       152480 if ( $response->is_success() ) {
217             # get the raw version of the current url
218 0         0 my( $title ) = $response->request->uri =~ m{\.org/wiki/(.+)$};
219 0         0 $src = sprintf( WIKIPEDIA_URL, $self->language(), $title );
220 0         0 $response = $self->get( $src );
221             return WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
222 0         0 clean_html => $self->{ clean_html } );
223             }
224              
225 1         17 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
226 1         21 return;
227             }
228              
229             =head2 error()
230              
231             This is a generic error accessor/mutator. You can retrieve any searching error
232             messages here.
233              
234             =cut
235              
236             sub error {
237 14     14 1 441 my $self = shift;
238              
239 14 100       61 if ( @_ ) {
240 13         34 $self->{ _ERROR } = shift;
241             }
242              
243 14         39 return $self->{ _ERROR };
244             }
245              
246             =head1 TODO
247              
248             =over 4
249              
250             =item *
251              
252             Be more specific on the HTML clean methodology. For now all tags are removed,
253             keeping only their contents. In the future the behaviour might change
254             accordingly with each specific tag.
255              
256             =item *
257              
258             Watch the development of Special:Export XML formatting, eg:
259             http://en.wikipedia.org/wiki/Special:Export/perl
260              
261             =back
262              
263             =head1 SEE ALSO
264              
265             =over 4
266              
267             =item * LWP::UserAgent
268              
269             =back
270              
271             =head1 REPOSITORY
272              
273             L
274              
275             =head1 AUTHORS
276              
277             Ed Summers Eehs@pobox.comE
278              
279             Brian Cassidy Ebricas@cpan.orgE
280              
281             =head1 COPYRIGHT AND LICENSE
282              
283             Copyright 2003-2015 by Ed Summers
284              
285             This library is free software; you can redistribute it and/or modify
286             it under the same terms as Perl itself.
287              
288             =cut
289              
290             1;