File Coverage

blib/lib/MediaWiki/API.pm
Criterion Covered Total %
statement 24 230 10.4
branch 0 112 0.0
condition 0 36 0.0
subroutine 8 24 33.3
pod 9 9 100.0
total 41 411 9.9


line stmt bran cond sub pod time code
1             package MediaWiki::API;
2              
3 1     1   66412 use warnings;
  1         2  
  1         33  
4 1     1   5 use strict;
  1         2  
  1         19  
5              
6             # our required modules
7              
8 1     1   686 use LWP::UserAgent;
  1         45951  
  1         72  
9 1     1   9 use URI::Escape;
  1         2  
  1         56  
10 1     1   546 use Encode;
  1         14439  
  1         93  
11 1     1   682 use JSON;
  1         10107  
  1         6  
12 1     1   113 use Carp;
  1         2  
  1         74  
13              
14             # just for debugging the module
15             # use Data::Dumper;
16             # use Devel::Peek;
17              
18             use constant {
19 1         2975 ERR_NO_ERROR => 0,
20             ERR_CONFIG => 1,
21             ERR_HTTP => 2,
22             ERR_API => 3,
23             ERR_LOGIN => 4,
24             ERR_EDIT => 5,
25             ERR_PARAMS => 6,
26             ERR_UPLOAD => 7,
27             ERR_DOWNLOAD => 8,
28              
29             DEF_RETRIES => 0,
30             DEF_RETRY_DELAY => 0,
31              
32             DEF_MAX_LAG => undef,
33             DEF_MAX_LAG_RETRIES => 4,
34             DEF_MAX_LAG_DELAY => 5,
35              
36             USE_HTTP_GET => 0
37 1     1   7 };
  1         2  
38              
39             =head1 NAME
40              
41             MediaWiki::API - Provides a Perl interface to the MediaWiki API (https://www.mediawiki.org/wiki/API)
42              
43             =head1 VERSION
44              
45             Version 0.41
46              
47             =cut
48              
49             our $VERSION = "0.51";
50              
51             =head1 SYNOPSIS
52              
53             This module provides an interface between Perl and the MediaWiki API (https://www.mediawiki.org/wiki/API) allowing creation of scripts to automate editing and extraction of data from MediaWiki driven sites like Wikipedia.
54              
55             use MediaWiki::API;
56              
57             my $mw = MediaWiki::API->new();
58             $mw->{config}->{api_url} = 'https://en.wikipedia.org/w/api.php';
59              
60             # log in to the wiki
61             $mw->login( { lgname => 'username', lgpassword => 'password' } )
62             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
63              
64             # get a list of articles in category
65             my $articles = $mw->list ( {
66             action => 'query',
67             list => 'categorymembers',
68             cmtitle => 'Category:Perl',
69             cmlimit => 'max' } )
70             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
71              
72             # and print the article titles
73             foreach (@{$articles}) {
74             print "$_->{title}\n";
75             }
76              
77             # get user info
78             my $userinfo = $mw->api( {
79             action => 'query',
80             meta => 'userinfo',
81             uiprop => 'blockinfo|hasmsg|groups|rights|options|editcount|ratelimits' } );
82              
83             ...
84              
85             =head1 FUNCTIONS
86              
87             =head2 MediaWiki::API->new( $config_hashref )
88              
89             Returns a MediaWiki API object. You can pass a config as a hashref when calling new, or set the configuration later. When creating a new object, defaults for max lag and retries are set.
90              
91             my $mw = MediaWiki::API->new( { api_url => 'https://en.wikipedia.org/w/api.php' } );
92              
93             Configuration options are
94              
95             =over
96              
97             =item * api_url = 'Path to mediawiki api.php';
98              
99             =item * files_url = 'Base url for files'; (needed if the api returns a relative URL for images like /images/a/picture.jpg)
100              
101             =item * upload_url = 'https://en.wikipedia.org/wiki/Special:Upload'; (path to the upload special page which is required if you want to upload images)
102              
103             =item * on_error = Function reference to call if an error occurs in the module.
104              
105             =item * use_http_get = Boolean 0 or 1 (defaults to 0). If set to 1, the perl module will use http GET method for accessing the api. By default it uses the POST method. Note that the module will still use POST for the api calls that require POST no matter what the value of this configuration option. Currently the following actions will work with GET: query, logout, paraminfo - see get_actions configuration below.
106              
107             =item * get_actions = Hashref (defaults to { 'query' => 1, 'logout' => 1, 'paraminfo' => 1 } ). This contains the API actions that are supported by the http GET method if it is enabled. Some wikis may have extensions that add more functions that work with an http GET request. If so, you can add actions as needed.
108              
109             =item * retries = Integer value; The number of retries to send an API request if an http error or JSON decoding error occurs. Defaults to 0 (try only once - don't retry). If max_retries is set to 4, and the wiki is down, the error won't be reported until after the 5th connection attempt.
110              
111             =item * retry_delay = Integer value in seconds; The amount of time to wait before retrying a request if an HTTP error or JSON decoding error occurs.
112              
113             =item * max_lag = Integer value in seconds; Wikipedia runs on a database cluster and as such high edit rates cause the slave servers to lag. If this config option is set then if the lag is more then the value of max_lag, the api will wait before retrying the request. 5 is a recommended value. More information about this subject can be found at https://www.mediawiki.org/wiki/Manual:Maxlag_parameter. note the config option includes an underscore so match the naming scheme of the other configuration options.
114              
115             =item * max_lag_delay = Integer value in seconds; This configuration option specified the delay to wait before retrying a request when the server has reported a lag more than the value of max_lag. This defaults to 5 if using the max_lag configuration option.
116              
117             =item * max_lag_retries = Integer value; The number of retries to send an API request if the server has reported a lag more than the value of max_lag. If the maximum retries is reached, an error is returned. Setting this to a negative value like -1 will mean the request is resent until the servers max_lag is below the threshold or another error occurs. Defaults to 4.
118              
119             =item * no_proxy = Boolean; Set to 1 to Disable use of any proxy set in the environment. Note by default if you have proxy environment variables set, then the module will attempt to use them. This feature was added at version 0.29. Versions below this ignore any proxy settings, but you can set this yourself by doing MediaWiki::API->{ua}->env_proxy() after creating a new instance of the API class. More information about env_proxy can be found at http://search.cpan.org/~gaas/libwww-perl-5.834/lib/LWP/UserAgent.pm#Proxy_attributes
120              
121             =back
122              
123             An example for the on_error configuration could be something like:
124              
125             $mw->{config}->{on_error} = \&on_error;
126              
127             sub on_error {
128             print "Error code: " . $mw->{error}->{code} . "\n";
129             print $mw->{error}->{stacktrace}."\n";
130             die;
131             }
132              
133             Errors are stored in $mw->{error}->{code} with more information in $mw->{error}->{details}. $mw->{error}->{stacktrace} includes
134             the details and a stacktrace to locate where any problems originated from (in some code which uses this module for example).
135              
136             The error codes are as follows
137              
138             =over
139              
140             =item * ERR_NO_ERROR = 0 (No error)
141              
142             =item * ERR_CONFIG = 1 (An error with the configuration)
143              
144             =item * ERR_HTTP = 2 (An http related connection error)
145              
146             =item * ERR_API = 3 (An error returned by the MediaWiki API)
147              
148             =item * ERR_LOGIN = 4 (An error logging in to the MediaWiki)
149              
150             =item * ERR_EDIT = 5 (An error with an editing function)
151              
152             =item * ERR_PARAMS = 6 (An error with parameters passed to a helper function)
153              
154             =item * ERR_UPLOAD = 7 (An error with the file upload facility)
155              
156             =item * ERR_DOWNLOAD = 8 (An error with downloading a file)
157              
158             =back
159              
160             Other useful parameters and objects in the MediaWiki::API object are
161              
162             =over
163              
164             =item * MediaWiki::API->{ua} = The LWP::UserAgent object. You could modify this to get or modify the cookies (MediaWiki::API->{ua}->cookie_jar) or to change the UserAgent string sent by this perl module (MediaWiki::API->{ua}->agent)
165              
166             =item * MediaWiki::API->{response} = the last response object returned by the LWP::UserAgent after an API request.
167              
168             =back
169              
170             =cut
171              
172             sub new {
173              
174 0     0 1   my ($class, $config) = @_;
175            
176             # if no config passed make a new hash reference and get the default configuration parameters
177 0 0         $config = {} if ! defined $config;
178 0           my $defconfig = _get_config_defaults();
179              
180 0           $config = {%$defconfig, %$config};
181              
182 0           my $self = { config => $config };
183              
184 0           my $ua = LWP::UserAgent->new();
185 0           $ua->cookie_jar({});
186 0           $ua->agent(__PACKAGE__ . "/$VERSION");
187 0           $ua->default_header("Accept-Encoding" => "gzip, deflate");
188 0 0         $ua->env_proxy() unless ($config->{no_proxy});
189              
190 0           $self->{ua} = $ua;
191              
192 0           my $json = JSON->new->utf8(1);
193 0           $self->{json} = $json;
194              
195             # initialise error code values
196 0           $self->{error}->{code} = 0;
197 0           $self->{error}->{details} = '';
198 0           $self->{error}->{stacktrace} = '';
199              
200 0           bless ($self, $class);
201 0           return $self;
202             }
203              
204             # returns a hashref with configuration defaults
205             sub _get_config_defaults {
206 0     0     my %config;
207              
208 0           $config{retries} = DEF_RETRIES;
209 0           $config{retry_delay} = DEF_RETRY_DELAY;
210              
211 0           $config{max_lag} = DEF_MAX_LAG;
212 0           $config{max_lag_retries} = DEF_MAX_LAG_RETRIES;
213 0           $config{max_lag_delay} = DEF_MAX_LAG_DELAY;
214            
215 0           $config{use_http_get} = USE_HTTP_GET;
216            
217             $config{get_actions} = {
218 0           'query' => 1,
219             'logout' => 1,
220             'paraminfo' => 1
221             };
222              
223 0           return \%config;
224             }
225              
226             =head2 MediaWiki::API->login( $query_hashref )
227              
228             Logs in to a MediaWiki. Parameters are those used by the MediaWiki API (https://www.mediawiki.org/wiki/API:Login). Returns a hashref with some login details, or undef on login failure. If Mediawiki sends requests a LoginToken the login is attempted again, but with the token sent from the initial login. Errors are stored in MediaWiki::API->{error}->{code} and MediaWiki::API->{error}->{details}.
229              
230             my $mw = MediaWiki::API->new( { api_url => 'https://en.wikipedia.org/w/api.php' } );
231              
232             #log in to the wiki
233             $mw->login( {lgname => 'username', lgpassword => 'password' } )
234             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
235              
236             =cut
237              
238             sub login {
239 0     0 1   my ($self, $query) = @_;
240 0           $query->{action} = 'login';
241             # attempt to login, and return undef if there was an api failure
242 0 0         return undef unless ( my $ref = $self->edit( $query ) );
243              
244             # reassign hash reference to the login section
245 0           my $login = $ref->{login};
246              
247             # return error if the login was not successful
248             return $self->_error( ERR_LOGIN, 'Login Failure - ' . $login->{result} )
249 0 0         unless ( $login->{result} eq 'Success' );
250            
251             # everything was ok so return the reference
252 0           return $login;
253             }
254              
255             =head2 MediaWiki::API->api( $query_hashref, $options_hashref )
256              
257             Call the MediaWiki API interface. Parameters are passed as a hashref which are described on the MediaWiki API page (https://www.mediawiki.org/wiki/API). returns a hashref with the results of the call or undef on failure with the error code and details stored in MediaWiki::API->{error}->{code} and MediaWiki::API->{error}->{details}. MediaWiki::API uses the LWP::UserAgent module to send the http requests to the MediaWiki API. After any API call, the response object returned by LWP::UserAgent is available in $mw->{response}. This function will NOT modify the input query_hashref in any way.
258              
259             binmode STDOUT, ':utf8';
260              
261             # get the name of the site
262             if ( my $ref = $mw->api( { action => 'query', meta => 'siteinfo' } ) ) {
263             print $ref->{query}->{general}->{sitename};
264             }
265              
266             # list of titles for "Albert Einstein" in different languages.
267             my $titles = $mw->api( {
268             action => 'query',
269             titles => 'Albert Einstein',
270             prop => 'langlinks',
271             lllimit => 'max' } )
272             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
273              
274             my ($pageid,$langlinks) = each ( %{ $titles->{query}->{pages} } );
275              
276             foreach ( @{ $langlinks->{langlinks} } ) {
277             print "$_->{'*'}\n";
278             }
279              
280             MediaWiki's API uses UTF-8 and any 8 bit character string parameters are encoded automatically by the API call. If your parameters are already in UTF-8 this will be detected and the encoding will be skipped. If your parameters for some reason contain UTF-8 data but no UTF-8 flag is set (i.e. you did not use the "use utf8;" pragma) you should prevent re-encoding by passing an option skip_encoding => 1 in the $options_hash. For example:
281              
282             my $mw = MediaWiki::API->new();
283             $mw->{config}->{api_url} = 'https://fr.wiktionary.org/w/api.php';
284              
285             my $query = {action => 'query',
286             list => 'categorymembers',
287             cmlimit => 'max'};
288              
289             $query->{cmtitle} ="Cat\x{e9}gorie:moyen_fran\x{e7}ais"; # latin1 string
290             $mw->list ( $query ); # ok
291              
292             $query->{cmtitle} = "Cat". pack("U", 0xe9)."gorie:moyen_fran".pack("U",0xe7)."ais"; # unicode string
293             $mw->list ( $query ); # ok
294              
295             $query->{cmtitle} ="Cat\x{c3}\x{a9}gorie:moyen_fran\x{c3}\x{a7}ais"; # unicode data without utf-8 flag
296             # $mw->list ( $query ); # NOT OK
297             $mw->list ( $query, {skip_encoding => 1} ); # ok
298              
299             If you are calling an API function which requires a file upload, e.g. import or upload, specify the file to upload as an arrayref containing the local filename. The API may return a warning, for example to say the file is a duplicate. To ignore warnings and force an upload, use ignorewarnings => 1. All the parameters as with everything else can be found on the MediaWiki API page.
300              
301             $mw->api( {
302             action => 'import',
303             xml => ['wiki_dump.xml']
304             } );
305              
306             $mw->api( {
307             action => 'upload',
308             filename => 'test.png',
309             comment => 'a test image',
310             file => ['test.png'],
311             } );
312              
313             You can also give the data to be uploaded directly, should you want to read the data in yourself. In this case, supply an arrayref with three parameters, starting with an "undef", followed by the filename, and then a Content => $data pair containing the data.
314              
315             $mw->api( {
316             action => 'import',
317             xml => [ undef, 'wiki_dump.xml', Content => $data ]
318             } );
319              
320             $mw->api( {
321             action => 'upload',
322             filename => 'test.png',
323             comment => 'a test image',
324             file => [ undef, 'test.png', Content => $data ],
325             } );
326              
327             =cut
328              
329             sub api {
330 0     0 1   my ($self, $query, $options) = @_;
331              
332             return $self->_error(ERR_CONFIG, "You need to give the URL to the mediawiki API php.")
333 0 0         unless $self->{config}->{api_url};
334              
335 0           my $retries = $self->{config}->{retries};
336 0           my $maxlagretries = 1;
337              
338 0           $self->_encode_hashref_utf8($query, $options->{skip_encoding});
339 0 0         $query->{maxlag} = $self->{config}->{max_lag} if defined $self->{config}->{max_lag};
340 0           $query->{format}='json';
341              
342             # if the config is set to use GET we need to contruct a querystring. some actions are "POST" only -
343             # edit, move, action = rollback, action = undelete, action =
344 0           my $querystring = '';
345 0 0 0       if ( $self->{config}->{use_http_get} && $self->{config}->{get_actions}->{$query->{action}} ) {
346 0           $querystring = _make_querystring( $query );
347             }
348              
349 0           my $ref;
350 0           while (1) {
351              
352             # connection retry loop.
353 0           foreach my $try (0 .. $retries) {
354              
355             # if we are already retrying, then wait the specified delay
356 0 0         if ( $try > 0 ) {
357 0           sleep $self->{config}->{retry_delay};
358             }
359              
360 0           my $response;
361             my %headers;
362             # if we are using the get method ($querystring is set above)
363 0 0         if ( $querystring ) {
364 0           $response = $self->{ua}->get( $self->{config}->{api_url} . $querystring, %headers );
365             } else {
366 0 0 0       $headers{'content-type'} = 'form-data' if $query->{action} eq 'upload' || $query->{action} eq 'import';
367 0           $response = $self->{ua}->post( $self->{config}->{api_url}, $query, %headers );
368             }
369 0           $self->{response} = $response;
370            
371             # if the request was successful then check the returned content and decode.
372 0 0         if ( $response->is_success ) {
373            
374 0           my $decontent = $response->decoded_content( charset => 'none' );
375              
376 0 0         if ( ! defined $decontent ) {
377 0 0         return $self->_error(ERR_HTTP,"Unable to decode content returned by $self->{config}->{api_url} - Unknown content encoding?")
378             if ( $try == $retries );
379 0           next;
380             }
381            
382 0 0         if ( length $decontent == 0 ) {
383 0 0         return $self->_error(ERR_HTTP,"$self->{config}->{api_url} returned a zero length string")
384             if ( $try == $retries );
385 0           next;
386             }
387              
388             # decode the json trapping any errors
389 0           eval {
390 0           $ref = $self->{json}->decode($decontent);
391             };
392              
393 0 0         if ( $@) {
394             # an error occurred, so we check if we need to retry and continue
395 0           my $error = $@;
396 0 0         return $self->_error(ERR_HTTP,"Failed to decode JSON returned by $self->{config}->{api_url}\nDecoding Error:\n$error\nReturned Data:\n$decontent")
397             if ( $try == $retries );
398 0           next;
399             } else {
400             # no error so we want out of the retry loop
401 0           last;
402             }
403              
404             # if the request was not successful then we retry or return a failure if the maximum retries
405             # have been reached, otherwise we try again
406             } else {
407 0 0         return $self->_error(ERR_HTTP, $response->status_line . " : error occurred when accessing $self->{config}->{api_url} after " . ($try+1) . " attempt(s)")
408             if ( $try == $retries );
409 0           next;
410             }
411            
412             }
413              
414 0 0 0       return $self->_error(ERR_API,"API has returned an empty array reference. Please check your parameters") if ( ref($ref) eq 'ARRAY' && scalar @{$ref} == 0);
  0            
415              
416             # check lag and wait
417 0 0 0       if (ref($ref) eq 'HASH' && exists $ref->{error} && $ref->{error}->{code} eq 'maxlag' ) {
      0        
418 0 0         if ($maxlagretries == $self->{config}->{max_lag_retries}) {
419 0           return $self->_error(ERR_API,"Server has reported lag above the configured max_lag value of " . $self->{config}->{max_lag} . " value after " . $self->{config}->{max_lag_retries} . " attempt(s). Last reported lag was - ". $ref->{'error'}->{'info'})
420             } else {
421 0           sleep $self->{config}->{max_lag_delay};
422 0 0         $maxlagretries++ if $maxlagretries < $self->{config}->{max_lag_retries};
423             # redo the request
424 0           next;
425             }
426              
427             }
428              
429             # if we got this far, then we have a hashref from the api and we want out of the while loop
430 0           last;
431              
432             }
433              
434 0 0 0       return $self->_error(ERR_API,$ref->{error}->{code} . ": " . $ref->{error}->{info} ) if ( ref($ref) eq 'HASH' && exists $ref->{error} );
435              
436 0           return $ref;
437             }
438              
439             =head2 MediaWiki::API->logout()
440              
441             Log the current user out and clear associated cookies and edit tokens.
442              
443             =cut
444              
445             sub logout {
446 0     0 1   my ($self) = @_;
447             # clear login cookies
448 0           $self->{ua}->{cookie_jar} = undef;
449             # clear cached tokens
450 0           $self->{config}->{tokens} = undef;
451             }
452              
453             =head2 MediaWiki::API->edit( $query_hashref, $options_hashref )
454              
455             A helper function for doing edits using the MediaWiki API. Parameters are passed as a hashref which are described on the MediaWiki API editing page (https://www.mediawiki.org/wiki/API:Changing_wiki_content). Note that you need $wgEnableWriteAPI = true in your LocalSettings.php to use these features. This function will modify the input hashref.
456              
457             Currently
458              
459             =over
460              
461             =item * Create/Edit pages (Mediawiki >= 1.13 )
462              
463             =item * Move pages (Mediawiki >= 1.12 )
464              
465             =item * Rollback (Mediawiki >= 1.12 )
466              
467             =item * Delete pages (Mediawiki >= 1.12 )
468              
469             =item * Upload images (Mediawiki >= 1.16 )
470              
471             =item * Import pages (Mediawiki >= 1.15 )
472              
473             =item * (Un)protect pages (Mediawiki >= 1.12 )
474              
475             =item * (Un)block users (Mediawiki >= 1.12 )
476              
477             =item * (Un)watch a page (Mediawiki >= 1.18 )
478              
479             =item * Email user (Mediawiki >= 1.14 )
480              
481             =item * Patrol changes (Mediawiki >= 1.14 )
482              
483             =back
484              
485             are supported via this call. Use this call to edit pages without having to worry about getting an edit token from the API first. The function will cache edit tokens to speed up future edits.
486              
487             Returns a hashref with the results of the call or undef on failure with the error code and details stored in MediaWiki::API->{error}->{code} and MediaWiki::API->{error}->{details}.
488              
489             The options hashref currently has one optional parameter (skip_encoding => 1). This is described above in the MediaWiki::API->api call documentation.
490              
491             Here are some example snippets of code. The first example is for adding some text to an existing page (if the page doesn't exist nothing will happen). Note that the timestamp for the revision we are changing is saved. This allows us to avoid edit conflicts. The value is passed back to the edit function, and if someone had edited the page in the meantime, an error will be returned.
492              
493             my $pagename = "Wikipedia:Sandbox";
494             my $ref = $mw->get_page( { title => $pagename } );
495             unless ( $ref->{missing} ) {
496             my $timestamp = $ref->{timestamp};
497             $mw->edit( {
498             action => 'edit',
499             title => $pagename,
500             basetimestamp => $timestamp, # to avoid edit conflicts
501             text => $ref->{'*'} . "\nAdditional text" } )
502             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
503             }
504              
505             The following code deletes a page with the name "DeleteMe". You can specify a reason for the deletion, otherwise
506             a generated reason will be used.
507              
508             # delete a page
509             $mw->edit( {
510             action => 'delete', title => 'DeleteMe', reason => 'no longer needed' } )
511             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
512              
513             This code moves a page from MoveMe to MoveMe2.
514              
515             # move a page
516             $mw->edit( {
517             action => 'move', from => 'MoveMe', to => 'MoveMe2' } )
518             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
519              
520             The following scrippet rolls back one or more edits from user MrVandal. If the user is not the last editor of the page, an error will be returned. If no user is passed, the edits for whoever last changed the page will be rolled back.
521              
522             $mw->edit( {
523             action => 'rollback', title => 'Sandbox', user => 'MrVandal' } )
524             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
525              
526             =cut
527              
528             sub edit {
529 0     0 1   my ($self, $query, $options) = @_;
530              
531             # choose token type and parameter name depending on action
532             # token types are createaccount, csrf, login, patrol, rollback, userrights, watch
533 0           my $action = $query->{action};
534 0           my $token_type = 'csrf';
535 0           my $token_param = 'token';
536              
537 0 0         if ( $action eq 'createaccount' ) {
    0          
    0          
    0          
    0          
538 0           $token_type = 'createaccount';
539 0           $token_param = 'createtoken';
540             } elsif ( $action eq 'login' ) {
541 0           $token_type = 'login';
542 0           $token_param = 'lgtoken';
543             } elsif ( $action eq 'patrol' ) {
544 0           $token_type = 'patrol';
545             } elsif ( $action eq 'rollback' ) {
546 0           $token_type = 'rollback';
547             } elsif ( $action eq 'userrights' ) {
548 0           $token_type = 'userrights';
549             }
550              
551             # get a token
552 0 0         return undef unless ( my $token = $self->_get_token( $token_type ) );
553              
554             # set the token
555 0           $query->{$token_param} = $token;
556              
557             # do the edit
558 0 0         return undef unless ( my $ref = $self->api( $query, $options ) );
559              
560 0           return $ref;
561             }
562              
563              
564             =head2 MediaWiki::API->get_page( $params_hashref )
565              
566             A helper function for getting the most recent page contents (and other metadata) for a page. It calls the lower level api function with a revisions query to get the most recent revision.
567              
568             # get some page contents
569             my $page = $mw->get_page( { title => 'Main Page' } );
570             # print page contents
571             print $page->{'*'};
572              
573             Returns a hashref with the following keys or undef on an error. If the page is missing then the returned hashref will contain only ns, title and a key called "missing".
574              
575             =over
576              
577             =item * '*' - contents of page
578              
579             =item * 'pageid' - page id of page
580              
581             =item * 'revid' - revision id of page
582              
583             =item * 'timestamp' - timestamp of revision
584              
585             =item * 'user' - user who made revision
586              
587             =item * 'title' - the title of the page
588              
589             =item * 'ns' - the namespace the page is in
590              
591             =item * 'size' - size of page in bytes
592              
593             =back
594              
595             Full information about these can be read on (https://www.mediawiki.org/wiki/API:Query_-_Properties#revisions_.2F_rv)
596              
597             =cut
598              
599             sub get_page {
600 0     0 1   my ($self, $params) = @_;
601 0 0         return undef unless ( my $ref = $self->api( { action => 'query', prop => 'revisions', titles => $params->{title}, rvprop => 'ids|flags|timestamp|user|comment|size|content' } ) );
602             # get the page id and the page hashref with title and revisions
603 0           my ($pageid, $pageref) = each %{ $ref->{query}->{pages} };
  0            
604             # get the first revision
605 0           my $rev = @{ $pageref->{revisions } }[0];
  0            
606             # delete the revision from the hashref
607 0           delete($pageref->{revisions});
608             # if the page is missing then return the pageref
609 0 0         return $pageref if ( defined $pageref->{missing} );
610             # combine the pageid, the latest revision and the page title into one hash
611 0           return { 'pageid'=>$pageid, %{ $rev }, %{ $pageref } };
  0            
  0            
612             }
613              
614             =head2 MediaWiki::API->list( $query_hashref, $options_hashref )
615              
616             A helper function for getting lists using the MediaWiki API. Parameters are passed as a hashref which are described on the MediaWiki API editing page (https://www.mediawiki.org/wiki/API:Query_-_Lists). This function modifies the input query_hashref.
617              
618             This function will return a reference to an array of hashes or undef on failure. It handles getting lists of data from the MediaWiki api, continuing the request with another connection if needed. The options_hashref currently has three parameters:
619              
620             =over
621              
622             =item * max => value
623              
624             =item * hook => \&function_hook
625              
626             =item * skip_encoding => 1
627              
628             =back
629              
630             The value of max specifies the maximum "queries" which will be used to pull data out. For example the default limit per query is 10 items, but this can be raised to 500 for normal users and higher for sysops and bots. If the limit is raised to 500 and max was set to 2, a maximum of 1000 results would be returned.
631              
632             If you wish to process large lists, for example the articles in a large category, you can pass a hook function, which will be passed a reference to an array of results for each query connection.
633              
634             The skip_encoding parameter works as described above in the MediaWiki::API->api call documentation.
635              
636             binmode STDOUT, ':utf8';
637              
638             # process the first 400 articles in the main namespace in the category "Surnames".
639             # get 100 at a time, with a max of 4 and pass each 100 to our hook.
640             $mw->list ( { action => 'query',
641             list => 'categorymembers',
642             cmtitle => 'Category:Surnames',
643             cmnamespace => 0,
644             cmlimit=>'100' },
645             { max => 4, hook => \&print_articles } )
646             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
647              
648             # print the name of each article
649             sub print_articles {
650             my ($ref) = @_;
651             foreach (@$ref) {
652             print "$_->{title}\n";
653             }
654             }
655              
656             =cut
657              
658             sub list {
659 0     0 1   my ($self, $query, $options) = @_;
660 0           my ($ref, @results);
661 0           my ($cont_key, $cont_value, $array_key);
662              
663 0           my $list = $query->{list};
664              
665 0 0         $options->{max} = 0 if ( !defined $options->{max} );
666              
667 0           $query->{'rawcontinue'} = '';
668              
669 0           my $continue = 0;
670 0           my $count = 0;
671             do {
672 0 0         return undef unless ( $ref = $self->api( $query, $options ) );
673              
674             # return (empty) arrayref if there are no results
675 0 0         return \@results unless ( $ref->{query}->{$list} );
676              
677             # check if there are more results to be had
678 0 0         if ( exists( $ref->{'query-continue'} ) ) {
679             # get query-continue hashref and extract key and value (key will be used as from parameter to continue where we left off)
680 0           ($cont_key, $cont_value) = each( %{ $ref->{'query-continue'}->{$list} } );
  0            
681 0           $query->{$cont_key} = $cont_value;
682 0           $continue = 1;
683             } else {
684 0           $continue = 0;
685             }
686              
687 0 0         if ( defined $options->{hook} ) {
688 0           $options->{hook}( $ref->{query}->{$list} );
689             } else {
690 0           push @results, @{ $ref->{query}->{$list} };
  0            
691             }
692              
693 0           $count += 1;
694              
695 0   0       } until ( ! $continue || $count >= $options->{max} && $options->{max} != 0 );
      0        
696              
697 0 0         return 1 if ( defined $options->{hook} );
698 0           return \@results;
699              
700             }
701              
702             =head2 MediaWiki::API->upload( $params_hashref )
703              
704             This function is deprecated. For uploading on mediawiki versions 1.16 or later, you are recommended to use MediaWiki::API->edit or MediaWiki::API->api directly, which has much better
705             error handling, and supports uploading files by just passing a filename.
706              
707             A function to upload files to a MediaWiki. This function does not use the MediaWiki API currently as support for file uploading is not yet implemented. Instead it uploads using the Special:Upload page, and as such an additional configuration value is needed.
708              
709             my $mw = MediaWiki::API->new( {
710             api_url => 'https://en.wikipedia.org/w/api.php' } );
711             # configure the special upload location.
712             $mw->{config}->{upload_url} = 'https://en.wikipedia.org/wiki/Special:Upload';
713              
714             The upload function is then called as follows
715              
716             # upload a file to MediaWiki
717             open FILE, "myfile.jpg" or die $!;
718             binmode FILE;
719             my ($buffer, $data);
720             while ( read(FILE, $buffer, 65536) ) {
721             $data .= $buffer;
722             }
723             close(FILE);
724              
725             $mw->upload( { title => 'file.jpg',
726             summary => 'This is the summary to go on the Image:file.jpg page',
727             data => $data } ) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
728              
729             Error checking is limited. Also note that the module will force a file upload, ignoring any warning for file size or overwriting an old file.
730              
731             =cut
732              
733             sub upload {
734 0     0 1   my ($self, $params) = @_;
735              
736             # get the version of mediawiki running, and if less than 1.16 use the old upload mechanism
737 0           my $mwver = $self->_get_version;
738 0           $mwver =~ /(\d+)\.(\d+)/;
739 0 0 0       if ( $1 == 1 && $2 < 16 ) {
740 0           return $self->_upload_old($params);
741             }
742              
743 0           my $query;
744 0           $query->{action} = 'upload';
745 0           $query->{filename} = $params->{title};
746 0           $query->{comment} = $params->{summary};
747 0           $query->{file} = [ undef, $params->{title}, Content => $params->{data} ];
748 0           $query->{ignorewarnings} = 1;
749 0           return $self->edit($query);
750             }
751              
752             sub _upload_old {
753 0     0     my ($self, $params) = @_;
754              
755 0 0         return $self->_error(ERR_CONFIG,"You need to give the URL to the mediawiki Special:Upload page.") unless $self->{config}->{upload_url};
756              
757             my $response = $self->{ua}->post(
758             $self->{config}->{upload_url},
759             Content_Type => 'multipart/form-data',
760             Content => [
761             wpUploadFile => [ undef, $params->{title}, Content => $params->{data} ],
762             wpSourceType => 'file',
763             wpDestFile => $params->{title},
764             wpUploadDescription => $params->{summary},
765 0           wpUpload => 'Upload file',
766             wpIgnoreWarning => 'true', ]
767             );
768              
769 0 0         return $self->_error(ERR_UPLOAD,"There was a problem uploading the file - $params->{title}") unless ( $response->code == 302 );
770 0           return 1;
771             }
772              
773             =head2 MediaWiki::API->download( $params_hashref )
774              
775             A function to download images/files from a MediaWiki. A file url may need to be configured if the api returns a relative URL.
776              
777             my $mw = MediaWiki::API->new( {
778             api_url => 'https://www.exotica.org.uk/mediawiki/api.php' } );
779             # configure the file url. Wikipedia doesn't need this but the ExoticA wiki does.
780             $mw->{config}->{files_url} = 'https://www.exotica.org.uk';
781              
782             The download function is then called as follows
783              
784             my $file = $mw->download( { title => 'Image:Mythic-Beasts_Logo.png'} )
785             || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
786              
787             If the file does not exist (on the wiki) an empty string is returned. If the file is unable to be downloaded undef is returned.
788              
789             =cut
790              
791             sub download {
792 0     0 1   my ($self, $params) = @_;
793              
794             return $self->_error(ERR_PARAMS,"You need to give a name for the Image page") unless
795 0 0         ( defined $params->{title} );
796              
797             return undef unless my $ref = $self->api(
798             { action => 'query',
799             titles => $params->{title},
800 0 0         prop => 'imageinfo',
801             iiprop => 'url' } );
802              
803             # get the page id and the page hashref with title and revisions
804 0           my ( $pageid, $pageref ) = each %{ $ref->{query}->{pages} };
  0            
805              
806             # if the image is missing then return an empty string
807 0 0         return '' unless ( defined $pageref->{imageinfo} );
808              
809 0           my $url = @{ $pageref->{imageinfo} }[0]->{url};
  0            
810              
811 0 0         unless ( $url =~ /^https?\:\/\// ) {
812             return $self->_error(ERR_PARAMS,'The API returned a relative path. You need to configure the url where files are stored in {config}->{files_url}')
813 0 0         unless ( defined $self->{config}->{files_url} );
814 0           $url = $self->{config}->{files_url} . $url;
815             }
816              
817 0           my $response = $self->{ua}->get($url);
818 0 0         return $self->_error(ERR_DOWNLOAD,"The file '$url' was not found")
819             unless ( $response->code == 200 );
820              
821 0           return $response->decoded_content;
822             }
823              
824             # returns the version of mediawiki being run
825             sub _get_version {
826 0     0     my ($self) = @_;
827 0 0         return $self->{config}->{mw_ver} if exists( $self->{config}->{mw_ver} );
828 0 0         return undef unless my $ref = $self->api(
829             {
830             action => 'query',
831             meta => 'siteinfo'
832             } );
833 0           my $mwver = $ref->{query}->{general}->{generator};
834 0           $mwver =~ s/.+?(\d+\.\d+).*/$1/;
835 0           $self->{config}->{mw_ver} = $mwver;
836 0           return $mwver;
837             }
838              
839             # returns a copy of a hash (passed by reference) encoded to utf-8
840             # used to encode parameters before being passed to the api
841             sub _encode_hashref_utf8 {
842 0     0     my $uriver = $URI::VERSION;
843 0           my ($self, $ref, $skipenc) = @_;
844 0           for my $key ( keys %{$ref} ) {
  0            
845             # skip to next item if no value defined or the item is a ref (i.e. a file upload)
846 0 0 0       next if ! defined $ref->{$key} || ref($ref->{$key});
847             # if we don't want to skip encoding and the item doesn't already have the utf8 flag set or we are using
848             # an older version of URI.pm that doesn't handle the encoding correctly then we need to encode to utf8
849 0 0 0       if ( ! $skipenc && ( ! utf8::is_utf8($ref->{$key}) || $URI::VERSION < 1.36) ) {
      0        
850 0           $ref->{$key} = Encode::encode_utf8($ref->{$key});
851             }
852             # turn on the utf8 flag so the URI module knows what to do with it (and so we don't re-encode when we don't need to)
853             # if we are using a new enough version of URI that will handle the encoding correctly.
854             # so what you get is :
855             # URI < 1.36 - utf8 encoded string without utf8 flag (works)
856             # URI >= 1.36 - utf8 encoded string with utf8 flag (works)
857 0 0         Encode::_utf8_on($ref->{$key}) if $URI::VERSION >= 1.36;
858             }
859              
860 0           return $ref;
861             }
862              
863             # creates a querystring from a utf-8 hashref
864             sub _make_querystring {
865 0     0     my ($ref) = @_;
866 0           my @qs = ();
867 0           my $keyval;
868 0           for my $key ( keys %{$ref} ) {
  0            
869 0           $keyval = uri_escape_utf8($key) . '=' . uri_escape_utf8($ref->{$key});
870 0           push(@qs, $keyval);
871             }
872 0           return '?' . join('&',@qs);
873             }
874              
875             # gets a token from the API (https://www.mediawiki.org/wiki/API:Tokens)
876             sub _get_token {
877 0     0     my ($self, $type) = @_;
878              
879             # Check if we have this token cached
880 0           my $token = $self->{config}->{tokens}->{$type};
881 0 0         return $token if defined $token;
882              
883 0           my $query = { action => 'query', meta => 'tokens', type => $type };
884              
885 0 0         return undef unless ( my $ref = $self->api( $query ) );
886              
887 0           $token = $ref->{query}->{tokens}->{$type . 'token'};
888              
889 0 0         return $self->_error( ERR_EDIT, "Unable to get a $type token." ) unless ( defined $token );
890              
891             # cache the token
892 0           $self->{config}->{tokens}->{$type} = $token;
893              
894 0           return $token;
895             }
896              
897             sub _error {
898 0     0     my ($self, $code, $desc) = @_;
899 0           $self->{error}->{code} = $code;
900 0           $self->{error}->{details} = $desc;
901 0           $self->{error}->{stacktrace} = Carp::longmess($desc);
902              
903 0 0         $self->{config}->{on_error}->() if (defined $self->{config}->{on_error});
904              
905 0           return undef;
906             }
907              
908             __END__