| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::Wikipedia; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 10 |  |  | 10 |  | 159507 | use 5.006; | 
|  | 10 |  |  |  |  | 35 |  | 
| 4 | 10 |  |  | 10 |  | 54 | use strict; | 
|  | 10 |  |  |  |  | 16 |  | 
|  | 10 |  |  |  |  | 219 |  | 
| 5 | 10 |  |  | 10 |  | 55 | use warnings; | 
|  | 10 |  |  |  |  | 17 |  | 
|  | 10 |  |  |  |  | 299 |  | 
| 6 | 10 |  |  | 10 |  | 46 | use Carp qw( croak ); | 
|  | 10 |  |  |  |  | 14 |  | 
|  | 10 |  |  |  |  | 617 |  | 
| 7 | 10 |  |  | 10 |  | 6739 | use URI::Escape (); | 
|  | 10 |  |  |  |  | 13169 |  | 
|  | 10 |  |  |  |  | 226 |  | 
| 8 | 10 |  |  | 10 |  | 4324 | use WWW::Wikipedia::Entry; | 
|  | 10 |  |  |  |  | 29 |  | 
|  | 10 |  |  |  |  | 351 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 10 |  |  | 10 |  | 50 | use base qw( LWP::UserAgent ); | 
|  | 10 |  |  |  |  | 17 |  | 
|  | 10 |  |  |  |  | 82387 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our $VERSION = '2.04'; | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 10 |  |  |  |  | 656 | use constant WIKIPEDIA_URL => | 
| 15 | 10 |  |  | 10 |  | 590901 | 'http://%s.wikipedia.org/w/index.php?title=%s&action=raw'; | 
|  | 10 |  |  |  |  | 25 |  | 
| 16 | 10 |  |  |  |  | 6904 | use constant WIKIPEDIA_RAND_URL => | 
| 17 | 10 |  |  | 10 |  | 50 | 'http://%s.wikipedia.org/wiki/Special:Random'; | 
|  | 10 |  |  |  |  | 20 |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | =head1 NAME | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | WWW::Wikipedia - Automated interface to the Wikipedia | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | use WWW::Wikipedia; | 
| 26 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new(); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | ## search for 'perl' | 
| 29 |  |  |  |  |  |  | my $result = $wiki->search( 'perl' ); | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | ## if the entry has some text print it out | 
| 32 |  |  |  |  |  |  | if ( $result->text() ) { | 
| 33 |  |  |  |  |  |  | print $result->text(); | 
| 34 |  |  |  |  |  |  | } | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | ## list any related items we can look up | 
| 37 |  |  |  |  |  |  | print join( "\n", $result->related() ); | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | WWW::Wikipedia provides an automated interface to the Wikipedia | 
| 42 |  |  |  |  |  |  | L, which is a free, collaborative, online | 
| 43 |  |  |  |  |  |  | encyclopedia. This module allows you to search for a topic and return the | 
| 44 |  |  |  |  |  |  | resulting entry. It also gives you access to related topics which are also | 
| 45 |  |  |  |  |  |  | available via the Wikipedia for that entry. | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | =head1 INSTALLATION | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | To install this module type the following: | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | perl Makefile.PL | 
| 53 |  |  |  |  |  |  | make | 
| 54 |  |  |  |  |  |  | make test | 
| 55 |  |  |  |  |  |  | make install | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | =head1 METHODS | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | =head2 new() | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | The constructor. You can pass it a two letter language code, or nothing | 
| 62 |  |  |  |  |  |  | to let it default to 'en'. | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | ## Default: English | 
| 65 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new(); | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | ## use the French wiki instead | 
| 68 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new( language => 'fr' ); | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | WWW::Wikipedia is a subclass of LWP::UserAgent. If you would | 
| 71 |  |  |  |  |  |  | like to have more control over the user agent (control timeouts, proxies ...) | 
| 72 |  |  |  |  |  |  | you have full access. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | ## set HTTP request timeout | 
| 75 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new(); | 
| 76 |  |  |  |  |  |  | $wiki->timeout( 2 ); | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | You can turn off the following of wikipedia redirect directives by passing | 
| 80 |  |  |  |  |  |  | a false value to C. | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | Together with the Wiki markup, some entries include HTML tags. | 
| 83 |  |  |  |  |  |  | They can be stripped out using the C option: | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new( clean_html => 1 ); | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | See C documentation bellow for details. | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | =cut | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub new { | 
| 92 | 13 |  |  | 13 | 1 | 514 | my ( $class, %opts ) = @_; | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 13 |  | 100 |  |  | 92 | my $language = delete $opts{ language } || 'en'; | 
| 95 | 13 |  |  |  |  | 82 | my $follow = delete $opts{ follow_redirects }; | 
| 96 | 13 | 50 |  |  |  | 48 | $follow = 1 if !defined $follow; | 
| 97 | 13 |  | 50 |  |  | 85 | my $clean_html = delete $opts{ clean_html } || 0; | 
| 98 |  |  |  |  |  |  |  | 
| 99 | 13 |  |  |  |  | 112 | my $self = LWP::UserAgent->new( %opts ); | 
| 100 | 13 |  |  |  |  | 39033 | $self->agent( 'WWW::Wikipedia' ); | 
| 101 | 13 |  | 33 |  |  | 839 | bless $self, ref( $class ) || $class; | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 13 |  |  |  |  | 55 | $self->language( $language ); | 
| 104 | 13 |  |  |  |  | 186 | $self->follow_redirects( $follow ); | 
| 105 | 13 |  |  |  |  | 46 | $self->clean_html( $clean_html ); | 
| 106 | 13 |  |  |  |  | 67 | $self->parse_head( 0 ); | 
| 107 | 13 |  |  |  |  | 1122 | return $self; | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | =head2 language() | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | This allows you to get and set the language you want to use. Two letter | 
| 113 |  |  |  |  |  |  | language codes should be used. The default is 'en'. | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | my $wiki = WWW::Wikipedia->new( language => 'es' ); | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | # Later on... | 
| 118 |  |  |  |  |  |  | $wiki->language( 'fr' ); | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | =cut | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub language { | 
| 123 | 31 |  |  | 31 | 1 | 2353 | my ( $self, $language ) = @_; | 
| 124 | 31 | 100 |  |  |  | 162 | $self->{ language } = $language if $language; | 
| 125 | 31 |  |  |  |  | 149 | return $self->{ language }; | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | =head2 clean_html() | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | Allows to get/set if HTML is being stripped out. | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | # set HTML strip | 
| 134 |  |  |  |  |  |  | $wiki->clean_html( 1 ); | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | This option removes all tags and attributes they might have. | 
| 137 |  |  |  |  |  |  | Their contents, however, is maintained (for now). Comments are | 
| 138 |  |  |  |  |  |  | also removed. | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | =cut | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | sub clean_html { | 
| 143 | 13 |  |  | 13 | 1 | 28 | my ( $self, $bool ) = @_; | 
| 144 | 13 | 50 |  |  |  | 63 | $self->{ clean_html } = $bool if defined $bool; | 
| 145 | 13 |  |  |  |  | 32 | return $self->{ clean_html }; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | =head2 follow_redirects() | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | By default, wikipeda redirect directives are followed. Set this to false to | 
| 151 |  |  |  |  |  |  | turn that off. | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | =cut | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | sub follow_redirects { | 
| 156 | 24 |  |  | 24 | 1 | 670 | my ( $self, $value ) = @_; | 
| 157 | 24 | 100 |  |  |  | 126 | $self->{ follow_redirects } = $value if defined $value; | 
| 158 | 24 |  |  |  |  | 116 | return $self->{ follow_redirects }; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =head2 search() | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | Which performs the search and returns a WWW::Wikipedia::Entry object which | 
| 164 |  |  |  |  |  |  | you can query further. See WWW::Wikipedia::Entry docs for more info. | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | $entry = $wiki->search( 'Perl' ); | 
| 167 |  |  |  |  |  |  | print $entry->text(); | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | If there's a problem connecting to Wikipedia, C will be returned and the | 
| 170 |  |  |  |  |  |  | error message will be stored in C. | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | =cut | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | sub search { | 
| 175 | 11 |  |  | 11 | 1 | 2642 | my ( $self, $string ) = @_; | 
| 176 |  |  |  |  |  |  |  | 
| 177 | 11 |  |  |  |  | 43 | $self->error( undef ); | 
| 178 |  |  |  |  |  |  |  | 
| 179 | 11 | 50 |  |  |  | 39 | croak( "search() requires you pass in a string" ) if !defined( $string ); | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 11 | 100 |  |  |  | 104 | my $enc_string = utf8::is_utf8( $string ) | 
| 182 |  |  |  |  |  |  | ? URI::Escape::uri_escape_utf8( $string ) | 
| 183 |  |  |  |  |  |  | : URI::Escape::uri_escape( $string ); | 
| 184 | 11 |  |  |  |  | 363 | my $src = sprintf( WIKIPEDIA_URL, $self->language(), $enc_string ); | 
| 185 |  |  |  |  |  |  |  | 
| 186 | 11 |  |  |  |  | 73 | my $response = $self->get( $src ); | 
| 187 | 11 | 100 |  |  |  | 4751669 | if ( $response->is_success() ) { | 
| 188 |  |  |  |  |  |  | my $entry = WWW::Wikipedia::Entry->new( $response->decoded_content(), $src, | 
| 189 | 10 |  |  |  |  | 219 | clean_html => $self->{ clean_html } ); | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | # look for a wikipedia style redirect and process if necessary | 
| 192 |  |  |  |  |  |  | # try to catch self-redirects | 
| 193 | 10 | 100 | 100 |  |  | 60 | return $self->search( $1 ) | 
|  |  |  | 66 |  |  |  |  | 
| 194 |  |  |  |  |  |  | if $self->follow_redirects && $entry->raw() =~ /^#REDIRECT\s*\[\[([^|\]]+)/is && $1 ne $string; | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 9 |  |  |  |  | 8848 | return ( $entry ); | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  | else { | 
| 199 | 1 |  |  |  |  | 12 | $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src ); | 
| 200 | 1 |  |  |  |  | 12 | return undef; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | =head2 random() | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | This method fetches a random wikipedia page. | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | =cut | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | sub random { | 
| 212 | 1 |  |  | 1 | 1 | 438 | my ( $self ) = @_; | 
| 213 | 1 |  |  |  |  | 3 | my $src = sprintf( WIKIPEDIA_RAND_URL, $self->language() ); | 
| 214 | 1 |  |  |  |  | 10 | my $response = $self->get( $src ); | 
| 215 |  |  |  |  |  |  |  | 
| 216 | 1 | 50 |  |  |  | 1302516 | if ( $response->is_success() ) { | 
| 217 |  |  |  |  |  |  | # get the raw version of the current url | 
| 218 | 1 |  |  |  |  | 27 | my( $title ) = $response->request->uri =~ m{\.org/wiki/(.+)$}; | 
| 219 | 1 |  |  |  |  | 46 | $src      = sprintf( WIKIPEDIA_URL, $self->language(), $title ); | 
| 220 | 1 |  |  |  |  | 7 | $response = $self->get( $src ); | 
| 221 |  |  |  |  |  |  | return WWW::Wikipedia::Entry->new( $response->decoded_content(), $src, | 
| 222 | 1 |  |  |  |  | 250919 | clean_html => $self->{ clean_html } ); | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 0 |  |  |  |  | 0 | $self->error( "uhoh, WWW::Wikipedia unable to contact " . $src ); | 
| 226 | 0 |  |  |  |  | 0 | return; | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | =head2 error() | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | This is a generic error accessor/mutator. You can retrieve any searching error | 
| 232 |  |  |  |  |  |  | messages here. | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | =cut | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | sub error { | 
| 237 | 13 |  |  | 13 | 1 | 414 | my $self = shift; | 
| 238 |  |  |  |  |  |  |  | 
| 239 | 13 | 100 |  |  |  | 44 | if ( @_ ) { | 
| 240 | 12 |  |  |  |  | 25 | $self->{ _ERROR } = shift; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 13 |  |  |  |  | 31 | return $self->{ _ERROR }; | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | =head1 TODO | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | =over 4 | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | =item * | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | Be more specific on the HTML clean methodology. For now all tags are removed, | 
| 253 |  |  |  |  |  |  | keeping only their contents. In the future the behaviour might change | 
| 254 |  |  |  |  |  |  | accordingly with each specific tag. | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | =item * | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  | Watch the development of Special:Export XML formatting, eg: | 
| 259 |  |  |  |  |  |  | http://en.wikipedia.org/wiki/Special:Export/perl | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | =back | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | =over 4 | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | =item * LWP::UserAgent | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | =back | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | =head1 REPOSITORY | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | L | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | =head1 AUTHORS | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | Ed Summers Eehs@pobox.comE | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | Brian Cassidy Ebricas@cpan.orgE | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | Copyright 2003-2015 by Ed Summers | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 286 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | =cut | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | 1; |