File Coverage

blib/lib/WWW/Wikipedia.pm
Criterion Covered Total %
statement 72 74 97.3
branch 16 20 80.0
condition 9 13 69.2
subroutine 16 16 100.0
pod 7 7 100.0
total 120 130 92.3


line stmt bran cond sub pod time code
1             package WWW::Wikipedia;
2              
3 10     10   159507 use 5.006;
  10         35  
4 10     10   54 use strict;
  10         16  
  10         219  
5 10     10   55 use warnings;
  10         17  
  10         299  
6 10     10   46 use Carp qw( croak );
  10         14  
  10         617  
7 10     10   6739 use URI::Escape ();
  10         13169  
  10         226  
8 10     10   4324 use WWW::Wikipedia::Entry;
  10         29  
  10         351  
9              
10 10     10   50 use base qw( LWP::UserAgent );
  10         17  
  10         82387  
11              
12             our $VERSION = '2.04';
13              
14 10         656 use constant WIKIPEDIA_URL =>
15 10     10   590901 'http://%s.wikipedia.org/w/index.php?title=%s&action=raw';
  10         25  
16 10         6904 use constant WIKIPEDIA_RAND_URL =>
17 10     10   50 'http://%s.wikipedia.org/wiki/Special:Random';
  10         20  
18              
19             =head1 NAME
20              
21             WWW::Wikipedia - Automated interface to the Wikipedia
22              
23             =head1 SYNOPSIS
24              
25             use WWW::Wikipedia;
26             my $wiki = WWW::Wikipedia->new();
27              
28             ## search for 'perl'
29             my $result = $wiki->search( 'perl' );
30              
31             ## if the entry has some text print it out
32             if ( $result->text() ) {
33             print $result->text();
34             }
35              
36             ## list any related items we can look up
37             print join( "\n", $result->related() );
38              
39             =head1 DESCRIPTION
40              
41             WWW::Wikipedia provides an automated interface to the Wikipedia
42             L, which is a free, collaborative, online
43             encyclopedia. This module allows you to search for a topic and return the
44             resulting entry. It also gives you access to related topics which are also
45             available via the Wikipedia for that entry.
46              
47              
48             =head1 INSTALLATION
49              
50             To install this module type the following:
51              
52             perl Makefile.PL
53             make
54             make test
55             make install
56              
57             =head1 METHODS
58              
59             =head2 new()
60              
61             The constructor. You can pass it a two letter language code, or nothing
62             to let it default to 'en'.
63              
64             ## Default: English
65             my $wiki = WWW::Wikipedia->new();
66              
67             ## use the French wiki instead
68             my $wiki = WWW::Wikipedia->new( language => 'fr' );
69              
70             WWW::Wikipedia is a subclass of LWP::UserAgent. If you would
71             like to have more control over the user agent (control timeouts, proxies ...)
72             you have full access.
73              
74             ## set HTTP request timeout
75             my $wiki = WWW::Wikipedia->new();
76             $wiki->timeout( 2 );
77              
78              
79             You can turn off the following of wikipedia redirect directives by passing
80             a false value to C.
81              
82             Together with the Wiki markup, some entries include HTML tags.
83             They can be stripped out using the C option:
84              
85             my $wiki = WWW::Wikipedia->new( clean_html => 1 );
86              
87             See C documentation bellow for details.
88              
89             =cut
90              
91             sub new {
92 13     13 1 514 my ( $class, %opts ) = @_;
93              
94 13   100     92 my $language = delete $opts{ language } || 'en';
95 13         82 my $follow = delete $opts{ follow_redirects };
96 13 50       48 $follow = 1 if !defined $follow;
97 13   50     85 my $clean_html = delete $opts{ clean_html } || 0;
98              
99 13         112 my $self = LWP::UserAgent->new( %opts );
100 13         39033 $self->agent( 'WWW::Wikipedia' );
101 13   33     839 bless $self, ref( $class ) || $class;
102              
103 13         55 $self->language( $language );
104 13         186 $self->follow_redirects( $follow );
105 13         46 $self->clean_html( $clean_html );
106 13         67 $self->parse_head( 0 );
107 13         1122 return $self;
108             }
109              
110             =head2 language()
111              
112             This allows you to get and set the language you want to use. Two letter
113             language codes should be used. The default is 'en'.
114              
115             my $wiki = WWW::Wikipedia->new( language => 'es' );
116            
117             # Later on...
118             $wiki->language( 'fr' );
119              
120             =cut
121              
122             sub language {
123 31     31 1 2353 my ( $self, $language ) = @_;
124 31 100       162 $self->{ language } = $language if $language;
125 31         149 return $self->{ language };
126             }
127              
128              
129             =head2 clean_html()
130              
131             Allows to get/set if HTML is being stripped out.
132              
133             # set HTML strip
134             $wiki->clean_html( 1 );
135              
136             This option removes all tags and attributes they might have.
137             Their contents, however, is maintained (for now). Comments are
138             also removed.
139              
140             =cut
141              
142             sub clean_html {
143 13     13 1 28 my ( $self, $bool ) = @_;
144 13 50       63 $self->{ clean_html } = $bool if defined $bool;
145 13         32 return $self->{ clean_html };
146             }
147              
148             =head2 follow_redirects()
149              
150             By default, wikipeda redirect directives are followed. Set this to false to
151             turn that off.
152              
153             =cut
154              
155             sub follow_redirects {
156 24     24 1 670 my ( $self, $value ) = @_;
157 24 100       126 $self->{ follow_redirects } = $value if defined $value;
158 24         116 return $self->{ follow_redirects };
159             }
160              
161             =head2 search()
162              
163             Which performs the search and returns a WWW::Wikipedia::Entry object which
164             you can query further. See WWW::Wikipedia::Entry docs for more info.
165              
166             $entry = $wiki->search( 'Perl' );
167             print $entry->text();
168              
169             If there's a problem connecting to Wikipedia, C will be returned and the
170             error message will be stored in C.
171              
172             =cut
173              
174             sub search {
175 11     11 1 2642 my ( $self, $string ) = @_;
176              
177 11         43 $self->error( undef );
178              
179 11 50       39 croak( "search() requires you pass in a string" ) if !defined( $string );
180            
181 11 100       104 my $enc_string = utf8::is_utf8( $string )
182             ? URI::Escape::uri_escape_utf8( $string )
183             : URI::Escape::uri_escape( $string );
184 11         363 my $src = sprintf( WIKIPEDIA_URL, $self->language(), $enc_string );
185              
186 11         73 my $response = $self->get( $src );
187 11 100       4751669 if ( $response->is_success() ) {
188             my $entry = WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
189 10         219 clean_html => $self->{ clean_html } );
190              
191             # look for a wikipedia style redirect and process if necessary
192             # try to catch self-redirects
193 10 100 100     60 return $self->search( $1 )
      66        
194             if $self->follow_redirects && $entry->raw() =~ /^#REDIRECT\s*\[\[([^|\]]+)/is && $1 ne $string;
195              
196 9         8848 return ( $entry );
197             }
198             else {
199 1         12 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
200 1         12 return undef;
201             }
202              
203             }
204              
205             =head2 random()
206              
207             This method fetches a random wikipedia page.
208              
209             =cut
210              
211             sub random {
212 1     1 1 438 my ( $self ) = @_;
213 1         3 my $src = sprintf( WIKIPEDIA_RAND_URL, $self->language() );
214 1         10 my $response = $self->get( $src );
215              
216 1 50       1302516 if ( $response->is_success() ) {
217             # get the raw version of the current url
218 1         27 my( $title ) = $response->request->uri =~ m{\.org/wiki/(.+)$};
219 1         46 $src = sprintf( WIKIPEDIA_URL, $self->language(), $title );
220 1         7 $response = $self->get( $src );
221             return WWW::Wikipedia::Entry->new( $response->decoded_content(), $src,
222 1         250919 clean_html => $self->{ clean_html } );
223             }
224              
225 0         0 $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src );
226 0         0 return;
227             }
228              
229             =head2 error()
230              
231             This is a generic error accessor/mutator. You can retrieve any searching error
232             messages here.
233              
234             =cut
235              
236             sub error {
237 13     13 1 414 my $self = shift;
238              
239 13 100       44 if ( @_ ) {
240 12         25 $self->{ _ERROR } = shift;
241             }
242              
243 13         31 return $self->{ _ERROR };
244             }
245              
246             =head1 TODO
247              
248             =over 4
249              
250             =item *
251              
252             Be more specific on the HTML clean methodology. For now all tags are removed,
253             keeping only their contents. In the future the behaviour might change
254             accordingly with each specific tag.
255              
256             =item *
257              
258             Watch the development of Special:Export XML formatting, eg:
259             http://en.wikipedia.org/wiki/Special:Export/perl
260              
261             =back
262              
263             =head1 SEE ALSO
264              
265             =over 4
266              
267             =item * LWP::UserAgent
268              
269             =back
270              
271             =head1 REPOSITORY
272              
273             L
274              
275             =head1 AUTHORS
276              
277             Ed Summers Eehs@pobox.comE
278              
279             Brian Cassidy Ebricas@cpan.orgE
280              
281             =head1 COPYRIGHT AND LICENSE
282              
283             Copyright 2003-2015 by Ed Summers
284              
285             This library is free software; you can redistribute it and/or modify
286             it under the same terms as Perl itself.
287              
288             =cut
289              
290             1;