line
stmt
bran
cond
sub
pod
time
code
1
package WWW::Mechanize::Firefox;
2
80
80
1129744
use 5.006; #weaken
80
307
80
2731
3
80
80
322
use strict;
80
103
80
2565
4
80
80
42159
use Time::HiRes qw(sleep); # hires sleep()
80
104677
80
287
5
6
80
80
49222
use URI ();
80
391638
80
1776
7
80
80
461
use File::Basename qw(dirname);
80
97
80
6244
8
80
80
41118
use HTTP::Response ();
80
1358412
80
2219
9
80
80
39374
use HTML::Selector::XPath 'selector_to_xpath';
80
162972
80
5418
10
80
80
35691
use MIME::Base64 'decode_base64';
80
41354
80
4280
11
80
80
36943
use WWW::Mechanize::Link;
80
23978
80
1842
12
80
80
32070
use Firefox::Application;
80
206
80
2315
13
80
80
411
use MozRepl::RemoteObject ();
80
98
80
929
14
80
80
284
use MozRepl::RemoteObject::Methods ();
80
104
80
995
15
80
80
31760
use HTTP::Cookies::MozRepl ();
80
198
80
1424
16
80
80
38470
use HTTP::Request::Common ();
80
221626
80
1837
17
80
80
437
use Scalar::Util qw'blessed weaken';
80
107
80
4518
18
80
80
339
use Encode qw(encode decode);
80
98
80
3409
19
80
80
376
use Carp qw(carp croak );
80
107
80
3071
20
21
80
80
313
use vars qw'$VERSION %link_spec @CARP_NOT';
80
96
80
54020
22
$VERSION = '0.78';
23
@CARP_NOT = ('MozRepl::RemoteObject',
24
'MozRepl::AnyEvent',
25
'MozRepl::RemoteObject::Instance'
26
); # we trust these blindly
27
28
=head1 NAME
29
30
WWW::Mechanize::Firefox - use Firefox as if it were WWW::Mechanize
31
32
=head1 SYNOPSIS
33
34
use WWW::Mechanize::Firefox;
35
my $mech = WWW::Mechanize::Firefox->new();
36
$mech->get('http://google.com');
37
38
$mech->eval_in_page('alert("Hello Firefox")');
39
my $png = $mech->content_as_png();
40
41
This module will let you automate Firefox through the
42
Mozrepl plugin. You need to have installed
43
that plugin in your Firefox.
44
45
For more examples see L.
46
47
=head1 CONSTRUCTOR and CONFIGURATION
48
49
=head2 C<< $mech->new( %args ) >>
50
51
use WWW::Mechanize::Firefox;
52
my $mech = WWW::Mechanize::Firefox->new();
53
54
Creates a new instance and connects it to Firefox.
55
56
Note that Firefox must have the C
57
extension installed and enabled.
58
59
The following options are recognized:
60
61
=over 4
62
63
=item *
64
65
C - regex for the title of the tab to reuse. If no matching tab is
66
found, the constructor dies.
67
68
If you pass in the string C, the currently
69
active tab will be used instead.
70
71
If you pass in a L instance, this will be used
72
as the new tab. This is convenient if you have an existing tab
73
in Firefox as object already, for example created through
74
LC<< ->addTab() >>.
75
76
=item *
77
78
C - will create a new tab if no existing tab matching
79
the criteria given in C can be found.
80
81
=item *
82
83
C - make the tab the active tab
84
85
=item *
86
87
C - name of the program to launch if we can't connect to it on
88
the first try.
89
90
=item *
91
92
C - an array reference of ids of subframes to include when
93
searching for elements on a page.
94
95
If you want to always search through all frames, just pass C<1>. This
96
is the default.
97
98
To prevent searching through frames, pass
99
100
frames => 0
101
102
To whitelist frames to be searched, pass the list
103
of frame selectors:
104
105
frames => ['#content_frame']
106
107
=item *
108
109
C - whether web failures converted are fatal Perl errors. See
110
the C accessor. True by default to make error checking easier.
111
112
To make errors non-fatal, pass
113
114
autodie => 0
115
116
in the constructor.
117
118
=item *
119
120
C - the name of the User Agent to use. This overrides
121
how Firefox identifies itself.
122
123
=item *
124
125
C - array reference to log levels, passed through to L
126
127
=item *
128
129
C - L buffer size, if the default of 1MB is not enough
130
131
=item *
132
133
C - the set of default Javascript events to listen for while
134
waiting for a reply. In fact, WWW::Mechanize::Firefox will almost always
135
wait until a 'DOMContentLoaded' or 'load' event. 'pagehide' events
136
will tell it for what frames to wait.
137
138
The default set is
139
140
'DOMContentLoaded','load',
141
'pageshow',
142
'pagehide',
143
'error','abort','stop',
144
145
=item *
146
147
C - a premade L
148
149
=item *
150
151
C - a premade L instance or a connection string
152
suitable for initializing one
153
154
=item *
155
156
C - whether to use the command queueing of L.
157
Default is 1.
158
159
=item *
160
161
C - whether to use native JSON encoder of Firefox
162
163
js_JSON => 'native', # force using the native JSON encoder
164
165
The default is to autodetect whether a native JSON encoder is available and
166
whether the transport is UTF-8 safe.
167
168
=item *
169
170
C - the events that are sent to an input field before its
171
value is changed. By default this is C<[focus]>.
172
173
=item *
174
175
C - the events that are sent to an input field after its
176
value is changed. By default this is C<[blur, change]>.
177
178
=back
179
180
=cut
181
182
sub new {
183
59
59
1
621
my ($class, %args) = @_;
184
185
59
50
228
if (! ref $args{ app }) {
186
59
211
my @passthrough = qw(launch repl bufsize log use_queue js_JSON);
187
59
100
132
my %options = map { exists $args{ $_ } ? ($_ => delete $args{ $_ }) : () }
354
582
188
@passthrough;
189
59
444
$args{ app } = Firefox::Application->new(
190
%options
191
);
192
};
193
194
0
0
if (my $tabname = delete $args{ tab }) {
195
0
0
if (! ref $tabname) {
0
196
0
0
if ($tabname eq 'current') {
197
0
$args{ tab } = $args{ app }->selectedTab();
198
} else {
199
0
croak "Don't know what to do with tab '$tabname'. Did you mean qr{$tabname}?";
200
};
201
} elsif ('MozRepl::RemoteObject::Instance' eq ref $tabname) {
202
# Nothing to do - we already got a tab passed in
203
# Just put it back in place
204
0
$args{ tab } = $tabname;
205
} else {
206
0
($args{ tab }) = grep { $_->{title} =~ /$tabname/ }
207
0
$args{ app }->openTabs();
208
0
0
if (! $args{ tab }) {
209
0
0
if (! delete $args{ create }) {
210
0
croak "Couldn't find a tab matching /$tabname/";
211
} else {
212
# fall through into tab creation
213
};
214
} else {
215
0
$args{ tab } = $args{ tab }->{tab};
216
};
217
};
218
};
219
0
0
if (! $args{ tab }) {
220
0
0
my @autoclose = exists $args{ autoclose } ? (autoclose => $args{ autoclose }) : ();
221
0
$args{ tab } = $args{ app }->addTab( @autoclose );
222
0
my $body = $args{ tab }->MozRepl::RemoteObject::Methods::dive(qw[ linkedBrowser contentWindow document body ]);
223
0
$body->{innerHTML} = __PACKAGE__;
224
};
225
226
0
0
if (delete $args{ autoclose }) {
227
0
$args{ app }->autoclose_tab($args{ tab });
228
};
229
0
0
if (! exists $args{ autodie }) { $args{ autodie } = 1 };
0
230
231
$args{ events } ||= [
232
0
0
'DOMContentLoaded','load',
233
'pageshow', # Navigation from cache will use "pageshow"
234
'pagehide',
235
'error','abort','stop',
236
];
237
0
0
$args{ on_event } ||= undef;
238
0
0
$args{ pre_value } ||= ['focus'];
239
0
0
$args{ post_value } ||= ['change','blur'];
240
0
0
if( ! exists $args{ frames }) {
241
0
0
$args{ frames } ||= 1; # we default to searching frames
242
};
243
244
0
0
die "No tab found"
245
unless $args{tab};
246
247
0
0
if (delete $args{ activate }) {
248
0
$args{ app }->activateTab( $args{ tab });
249
};
250
251
0
0
$args{ response } ||= undef;
252
0
0
$args{ current_form } ||= undef;
253
254
0
0
$args{ event_log } ||= [];
255
256
0
my $agent = delete $args{ agent };
257
258
0
my $self= bless \%args, $class;
259
260
0
$self->_initXpathResultTypes;
261
262
0
0
if( defined $agent ) {
263
0
$self->agent( $agent );
264
};
265
266
0
$self
267
};
268
269
sub DESTROY {
270
0
0
my ($self) = @_;
271
0
local $@;
272
0
0
if (my $app = delete $self->{ app }) {
273
0
%$self = (); # wipe out all references we keep
274
# but keep $app alive until we can dispose of it
275
# as the last thing, now:
276
0
$app = undef;
277
};
278
}
279
280
=head2 C<< $mech->agent( $product_id ); >>
281
282
$mech->agent('wonderbot/JS 1.0');
283
284
Set the product token that is used to identify the user agent on the network.
285
The agent value is sent as the "User-Agent" header in the requests. The default
286
is whatever Firefox uses.
287
288
To reset the user agent to the Firefox default, pass an empty string:
289
290
$mech->agent('');
291
292
=cut
293
294
sub agent {
295
0
0
1
my ($self,$name) = @_;
296
0
0
if( defined $name ) {
0
297
0
$self->add_header('User-Agent',$name);
298
} elsif( $name eq '' ) {
299
0
$self->delete_header('User-Agent');
300
};
301
};
302
303
=head2 C<< $mech->autodie( [$state] ) >>
304
305
$mech->autodie(0);
306
307
Accessor to get/set whether warnings become fatal.
308
309
=cut
310
311
0
0
0
1
sub autodie { $_[0]->{autodie} = $_[1] if @_ == 2; $_[0]->{autodie} }
0
312
313
=head2 C<< $mech->events() >>
314
315
$mech->events( ['load'] );
316
317
Sets or gets the set of Javascript events that WWW::Mechanize::Firefox
318
will wait for after requesting a new page. Returns an array reference.
319
320
Changing the set of events will most likely make WWW::Mechanize::Firefox
321
stall while waiting for a response.
322
323
This method is special to WWW::Mechanize::Firefox.
324
325
=cut
326
327
0
0
0
1
sub events { $_[0]->{events} = $_[1] if (@_ > 1); $_[0]->{events} };
0
328
329
=head2 C<< $mech->on_event() >>
330
331
$mech->on_event(1); # prints every page load event
332
333
# or give it a callback
334
$mech->on_event(sub { warn "Page loaded with $ev->{name} event" });
335
336
Gets/sets the notification handler for the Javascript event
337
that finished a page load. Set it to C<1> to output via C,
338
or a code reference to call it with the event.
339
340
This method is special to WWW::Mechanize::Firefox.
341
342
=cut
343
344
0
0
0
1
sub on_event { $_[0]->{on_event} = $_[1] if (@_ > 1); $_[0]->{on_event} };
0
345
346
=head2 C<< $mech->cookies() >>
347
348
my $cookie_jar = $mech->cookies();
349
350
Returns a L object that was initialized
351
from the live Firefox instance.
352
353
B C<< ->set_cookie >> is not yet implemented,
354
as is saving the cookie jar.
355
356
=cut
357
358
sub cookies {
359
0
0
1
return HTTP::Cookies::MozRepl->new(
360
repl => $_[0]->repl
361
)
362
}
363
364
=head1 JAVASCRIPT METHODS
365
366
=head2 C<< $mech->allow( %options ) >>
367
368
Enables or disables browser features for the current tab.
369
The following options are recognized:
370
371
=over 4
372
373
=item *
374
375
C - Whether to allow plugin execution.
376
377
=item *
378
379
C - Whether to allow Javascript execution.
380
381
=item *
382
383
C - Attribute stating if refresh based redirects can be allowed.
384
385
=item *
386
387
C, C - Attribute stating if it should allow subframes (framesets/iframes) or not.
388
389
=item *
390
391
C - Attribute stating whether or not images should be loaded.
392
393
=back
394
395
Options not listed remain unchanged.
396
397
=head3 Disable Javascript
398
399
$mech->allow( javascript => 0 );
400
401
=cut
402
403
80
80
1725
use vars '%known_options';
80
111
80
326177
404
%known_options = (
405
'javascript' => 'allowJavascript',
406
'plugins' => 'allowPlugins',
407
'metaredirects' => 'allowMetaRedirects',
408
'subframes' => 'allowSubframes',
409
'frames' => 'allowSubframes',
410
'images' => 'allowImages',
411
);
412
413
sub allow {
414
0
0
1
my ($self,%options) = @_;
415
0
my $shell = $self->docshell;
416
0
for my $opt (sort keys %options) {
417
0
0
if (my $opt_js = $known_options{ $opt }) {
418
0
$shell->{$opt_js} = $options{ $opt };
419
} else {
420
0
carp "Unknown option '$opt_js' (ignored)";
421
};
422
};
423
};
424
425
=head2 C<< $mech->js_errors() >>
426
427
print $_->{message}
428
for $mech->js_errors();
429
430
An interface to the Javascript Error Console
431
432
Returns the list of errors in the JEC
433
434
Maybe this should be called C or
435
C instead.
436
437
=cut
438
439
sub js_console {
440
0
0
0
my ($self) = @_;
441
0
my $getConsoleService = $self->repl->declare(<<'JS');
442
function() {
443
return Components.classes["@mozilla.org/consoleservice;1"]
444
.getService(Components.interfaces.nsIConsoleService);
445
}
446
JS
447
0
$getConsoleService->()
448
}
449
450
sub js_errors {
451
0
0
1
my ($self,$page) = @_;
452
0
my $console = $self->js_console;
453
0
my $getErrorMessages = $self->repl->declare(<<'JS', 'list');
454
function (consoleService) {
455
var out = {};
456
consoleService.getMessageArray(out, {});
457
return out.value || []
458
};
459
JS
460
0
$getErrorMessages->($console);
461
}
462
463
=head2 C<< $mech->clear_js_errors() >>
464
465
$mech->clear_js_errors();
466
467
Clears all Javascript messages from the console
468
469
=cut
470
471
sub clear_js_errors {
472
0
0
1
my ($self,$page) = @_;
473
0
$self->js_console->reset;
474
475
};
476
477
=head2 C<< $mech->eval_in_page( $str [, $env [, $document]] ) >>
478
479
=head2 C<< $mech->eval( $str [, $env [, $document]] ) >>
480
481
my ($value, $type) = $mech->eval( '2+2' );
482
483
Evaluates the given Javascript fragment in the
484
context of the web page.
485
Returns a pair of value and Javascript type.
486
487
This allows access to variables and functions declared
488
"globally" on the web page.
489
490
The returned result needs to be treated with
491
extreme care because
492
it might lead to Javascript execution in the context of
493
your application instead of the context of the webpage.
494
This should be evident for functions and complex data
495
structures like objects. When working with results from
496
untrusted sources, you can only safely use simple
497
types like C.
498
499
If you want to modify the environment the code is run under,
500
pass in a hash reference as the second parameter. All keys
501
will be inserted into the C object as well as
502
C. Also, complex data structures are only
503
supported if they contain no objects.
504
If you need finer control, you'll have to
505
write the Javascript yourself.
506
507
This method is special to WWW::Mechanize::Firefox.
508
509
Also, using this method opens a potential B as
510
the returned values can be objects and using these objects
511
can execute malicious code in the context of the Firefox application.
512
513
=cut
514
515
sub eval_in_page {
516
0
0
1
my ($self,$str,$env,$doc,$window) = @_;
517
0
0
$env ||= {};
518
0
my $js_env = {};
519
0
0
$doc ||= $self->document;
520
521
# do a manual transfer of keys, to circumvent our stupid
522
# transformation routine:
523
0
0
if (keys %$env) {
524
0
$js_env = $self->repl->declare(<<'JS')->();
525
function () { return new Object }
526
JS
527
0
for my $k (keys %$env) {
528
0
$js_env->{$k} = $env->{$k};
529
};
530
};
531
532
0
my $eval_in_sandbox = $self->repl->declare(<<'JS', 'list');
533
function (w,d,str,env,caller,line) {
534
var unsafeWin = w.wrappedJSObject;
535
var safeWin = XPCNativeWrapper(unsafeWin);
536
var sandbox = Components.utils.Sandbox(safeWin);
537
sandbox.window = safeWin;
538
sandbox.document = d;
539
// Transfer the environment
540
for (var e in env) {
541
sandbox[e] = env[e]
542
sandbox.window[e] = env[e]
543
}
544
sandbox.__proto__ = unsafeWin;
545
546
var res = Components.utils.evalInSandbox(str, sandbox, "1.8",caller,line);
547
return [res,typeof(res)];
548
};
549
JS
550
0
0
$window ||= $self->tab->{linkedBrowser}->{contentWindow};
551
# Report errors from scope of caller
552
# This feels weirdly backwards here, but oh well:
553
#local @CARP_NOT = (ref $self->repl); # we trust this
554
555
0
my ($caller,$line) = (caller)[1,2];
556
557
0
$eval_in_sandbox->($window,$doc,$str,$js_env,$caller,$line);
558
};
559
*eval = \&eval_in_page;
560
561
=head2 C<< $mech->unsafe_page_property_access( ELEMENT ) >>
562
563
Allows you unsafe access to properties of the current page. Using
564
such properties is an incredibly bad idea.
565
566
This is why the function Cs. If you really want to use
567
this function, edit the source code.
568
569
=cut
570
571
sub unsafe_page_property_access {
572
0
0
1
my ($mech,$element) = @_;
573
0
die;
574
0
my $window = $mech->tab->{linkedBrowser}->{contentWindow};
575
0
my $unsafe = $window->{wrappedJSObject};
576
0
$unsafe->{$element}
577
};
578
579
=head1 UI METHODS
580
581
See also L for how to add more than one tab
582
and how to manipulate windows and tabs.
583
584
=head2 C<< $mech->application() >>
585
586
my $ff = $mech->application();
587
588
Returns the L object for manipulating
589
more parts of the Firefox UI and application.
590
591
=cut
592
593
0
0
1
sub application { $_[0]->{app} };
594
595
=head2 C<< $mech->autoclose_tab >>
596
597
$mech->autoclose_tab( 0 ); # keep tab open after program end
598
599
Set whether to close the tab associated with the instance.
600
601
=cut
602
603
sub autoclose_tab {
604
0
0
1
my $self = shift;
605
0
$self->application->autoclose_tab($self->tab, @_);
606
};
607
608
=head2 C<< $mech->tab() >>
609
610
Gets the object that represents the Firefox tab used by WWW::Mechanize::Firefox.
611
612
This method is special to WWW::Mechanize::Firefox.
613
614
=cut
615
616
0
0
1
sub tab { $_[0]->{tab} };
617
618
=head2 C<< $mech->make_progress_listener( %callbacks ) >>
619
620
my $eventlistener = $mech->progress_listener(
621
onStateChange => \&onStateChange,
622
);
623
624
Creates an unconnected C<< nsIWebProgressListener >> interface
625
which calls the Perl subroutines you pass in.
626
627
Returns a handle. Once the handle gets released, all callbacks will
628
get stopped. Also, all Perl callbacks will get deregistered from the
629
Javascript bridge, so make sure not to use the same callback
630
in different progress listeners at the same time.
631
The sender may still call your callbacks.
632
633
=cut
634
635
sub make_progress_listener {
636
0
0
1
my ($mech,%handlers) = @_;
637
0
my $NOTIFY_STATE = $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
638
+ $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS')
639
;
640
0
my ($obj) = $mech->repl->expr('new Object');
641
0
for my $key (keys %handlers) {
642
0
$obj->{$key} = $handlers{$key};
643
};
644
#warn "Listener created";
645
646
0
my $mk_nsIWebProgressListener = $mech->repl->declare(<<'JS');
647
function (myListener) {
648
var callbacks = ["onStateChange",
649
"onLocationChange",
650
"onProgressChange",
651
"onStatusChange",
652
"onSecurityChange"
653
// ,"onProgressChange64"
654
// ,"onRefreshAttempted"
655
];
656
for (var h in callbacks) {
657
var e = callbacks[h];
658
if (! myListener[e]) {
659
myListener[e] = function(){}
660
} else {
661
// alert("Setting callback for " + e);
662
};
663
};
664
myListener.QueryInterface = function(aIID) {
665
if (aIID.equals(Components.interfaces.nsIWebProgressListener) ||
666
// aIID.equals(Components.interfaces.nsIWebProgressListener2) ||
667
aIID.equals(Components.interfaces.nsISupportsWeakReference) ||
668
aIID.equals(Components.interfaces.nsISupports))
669
return this;
670
throw Components.results.NS_NOINTERFACE;
671
};
672
return myListener
673
}
674
JS
675
676
# Declare it here so we don't close over $lsn!
677
my $release = sub {
678
0
0
0
$_[0]->bridge->remove_callback(values %handlers)
679
if $_[0]->bridge;
680
0
};
681
0
my $lsn = $mk_nsIWebProgressListener->($obj);
682
0
$lsn->__on_destroy($release);
683
0
$lsn
684
};
685
686
687
=head2 C<< $mech->progress_listener( $source, %callbacks ) >>
688
689
my $eventlistener = progress_listener(
690
$browser,
691
onLocationChange => \&onLocationChange,
692
);
693
694
Sets up the callbacks for the C<< nsIWebProgressListener >> interface
695
to be the Perl subroutines you pass in.
696
697
C< $source > needs to support C<.addProgressListener> and C<.removeProgressListener>.
698
699
Returns a handle. Once the handle gets released, all callbacks will
700
get stopped. Also, all Perl callbacks will get deregistered from the
701
Javascript bridge, so make sure not to use the same callback
702
in different progress listeners at the same time.
703
704
=cut
705
706
sub progress_listener {
707
0
0
1
my ($self,$source,%handlers) = @_;
708
709
0
my $lsn = $self->make_progress_listener(%handlers);
710
0
$lsn->{source} = $source;
711
712
0
$lsn->__release_action('if(self.source)try{self.source.removeProgressListener(self)}catch(e){}');
713
0
my $NOTIFY_STATE = $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
714
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_LOCATION')
715
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS');
716
0
$source->addProgressListener($lsn,$NOTIFY_STATE);
717
0
$lsn
718
};
719
720
=head2 C<< $mech->repl() >>
721
722
my ($value,$type) = $mech->repl->expr('2+2');
723
724
Gets the L instance that is used.
725
726
This method is special to WWW::Mechanize::Firefox.
727
728
=cut
729
730
0
0
1
sub repl { $_[0]->application->repl };
731
732
=head2 C<< $mech->highlight_node( @nodes ) >>
733
734
my @links = $mech->selector('a');
735
$mech->highlight_node(@links);
736
737
Convenience method that marks all nodes in the arguments
738
with
739
740
background: red;
741
border: solid black 1px;
742
display: block; /* if the element was display: none before */
743
744
This is convenient if you need visual verification that you've
745
got the right nodes.
746
747
There currently is no way to restore the nodes to their original
748
visual state except reloading the page.
749
750
=cut
751
752
sub highlight_node {
753
0
0
1
my ($self,@nodes) = @_;
754
0
for (@nodes) {
755
0
my $style = $_->{style};
756
0
0
$style->{display} = 'block'
757
if $style->{display} eq 'none';
758
0
$style->{background} = 'red';
759
0
$style->{border} = 'solid black 1px;';
760
};
761
};
762
763
=head1 NAVIGATION METHODS
764
765
=head2 C<< $mech->get( $url, %options ) >>
766
767
$mech->get( $url, ':content_file' => $tempfile );
768
769
Retrieves the URL C into the tab.
770
771
It returns a faked L object for interface compatibility
772
with L.
773
774
Recognized options:
775
776
=over 4
777
778
=item *
779
780
C<< :content_file >> - filename to store the data in
781
782
=item *
783
784
C<< no_cache >> - if true, bypass the browser cache
785
786
=item *
787
788
C<< synchronize >> - wait until all elements have loaded
789
790
The default is to wait until all elements have loaded. You can switch
791
this off by passing
792
793
synchronize => 0
794
795
for example if you want to manually poll for an element that appears fairly
796
early during the load of a complex page.
797
798
=back
799
800
=cut
801
802
sub get {
803
0
0
1
my ($self,$url, %options) = @_;
804
0
my $b = $self->tab->{linkedBrowser};
805
0
$self->clear_current_form;
806
807
0
my $flags = 0;
808
0
0
if ($options{ no_cache }) {
809
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
810
};
811
0
0
if (! exists $options{ synchronize }) {
812
0
$options{ synchronize } = $self->events;
813
};
814
0
0
if( !ref $options{ synchronize }) {
815
$options{ synchronize } = $options{ synchronize }
816
0
0
? $self->events
817
: []
818
};
819
820
$self->_sync_call( $options{ synchronize }, sub {
821
0
0
0
if (my $target = delete $options{":content_file"}) {
822
0
$self->save_url($url => ''.$target, %options);
823
} else {
824
0
$b->loadURIWithFlags(''.$url,$flags);
825
};
826
0
});
827
};
828
829
=head2 C<< $mech->get_local( $filename , %options ) >>
830
831
$mech->get_local('test.html');
832
833
Shorthand method to construct the appropriate
834
C<< file:// >> URI and load it into Firefox. Relative
835
paths will be interpreted as relative to C<$0>.
836
837
This method accepts the same options as C<< ->get() >>.
838
839
This method is special to WWW::Mechanize::Firefox but could
840
also exist in WWW::Mechanize through a plugin.
841
842
=cut
843
844
sub get_local {
845
0
0
1
my ($self, $htmlfile, %options) = @_;
846
0
require Cwd;
847
0
require File::Spec;
848
0
my $fn = File::Spec->rel2abs(
849
File::Spec->catfile(dirname($0),$htmlfile),
850
Cwd::getcwd(),
851
);
852
0
$fn =~ s!\\!/!g; # fakey "make file:// URL"
853
854
0
$self->get("file://$fn", %options);
855
}
856
857
=head2 C<< $mech->post( $url, %options ) >>
858
859
$mech->post( 'http://example.com',
860
params => { param => "Hello World" },
861
headers => {
862
"Content-Type" => 'application/x-www-form-urlencoded',
863
},
864
charset => 'utf-8',
865
);
866
867
Sends a POST request to C<$url>.
868
869
A C header will be automatically calculated if
870
it is not given.
871
872
The following options are recognized:
873
874
=over 4
875
876
=item *
877
878
C - a hash of HTTP headers to send. If not given,
879
the content type will be generated automatically.
880
881
=item *
882
883
C - the raw data to send, if you've encoded it already.
884
885
=back
886
887
=cut
888
889
sub post {
890
0
0
1
my ($self, $url, %options) = @_;
891
0
my $b = $self->tab->{linkedBrowser};
892
0
$self->clear_current_form;
893
894
0
my $flags = 0;
895
0
0
if ($options{no_cache}) {
896
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
897
};
898
0
0
if (! exists $options{synchronize}) {
899
0
$options{synchronize} = $self->events;
900
};
901
0
0
if( !ref $options{synchronize}) {
902
0
0
$options{synchronize} = $options{synchronize}
903
? $self->events
904
: []
905
};
906
907
# If we don't have data, encode the parameters:
908
0
0
if( !$options{ data }) {
909
0
my $req= HTTP::Request::Common::POST( $url, $options{params} );
910
0
warn $req->content;
911
0
$options{ data } = $req->content;
912
};
913
914
0
0
$options{ charset } ||= 'utf-8';
915
0
0
$options{ headers } ||= {};
916
0
0
$options{ headers }->{"Content-Type"} ||= "application/x-www-form-urlencoded";
917
0
0
if( $options{ charset }) {
918
0
$options{ headers }->{"Content-Type"} .= "; charset=$options{ charset }";
919
};
920
921
0
my $streamPostData = $self->repl->declare(<<'JS');
922
function(headers, dataString) {
923
// POST method requests must wrap the encoded text in a MIME stream
924
const Cc = Components.classes;
925
const Ci = Components.interfaces;
926
var stringStream = Cc["@mozilla.org/io/string-input-stream;1"].
927
createInstance(Ci.nsIStringInputStream);
928
if ("data" in stringStream) // Gecko 1.9 or newer
929
stringStream.data = dataString;
930
else // 1.8 or older
931
stringStream.setData(dataString, dataString.length);
932
933
var postData = Cc["@mozilla.org/network/mime-input-stream;1"].
934
createInstance(Ci.nsIMIMEInputStream);
935
for( h in headers ) {
936
postData.addHeader( h, headers[h] );
937
};
938
postData.addContentLength = true;
939
postData.setData(stringStream);
940
941
return postData;
942
}
943
JS
944
945
$self->_sync_call($options{synchronize}, sub {
946
0
0
my $postData = $streamPostData->($options{headers}, $options{data});
947
0
$b->loadURIWithFlags(''.$url, $flags, undef, $options{charset}, $postData);
948
0
});
949
}
950
951
=head2 C<< $mech->add_header( $name => $value, ... ) >>
952
953
$mech->add_header(
954
'X-WWW-Mechanize-Firefox' => "I'm using it",
955
Encoding => 'text/klingon',
956
);
957
958
This method sets up custom headers that will be sent with B HTTP(S)
959
request that Firefox makes.
960
961
Using multiple instances of WWW::Mechanize::Firefox objects with the same
962
application together with changed request headers will most likely have weird
963
effects. So don't do that.
964
965
Note that currently, we only support one value per header.
966
967
Some versions of Firefox don't work with the method that is used to set
968
the custom headers. Please see C for the exact
969
versions where the implemented mechanism doesn't work. Roughly, this is
970
for versions 17 to 24 of Firefox.
971
972
=cut
973
974
# This subroutine creates the custom header observer. It has a hashref
975
# of headers that it will add to EACH request that Firefox sends out.
976
# It removes itself when the Perl object gets destroyed.
977
sub _custom_header_observer {
978
0
0
my ($self, @headers) = @_;
979
980
# This routine was taken from http://d.hatena.ne.jp/oppara/20090410/p1
981
0
my $on_modify_request = $self->repl->declare(<<'JS');
982
function() { // headers passed via arguments
983
const Cc= Components.classes;
984
const Ci= Components.interfaces;
985
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
986
var h= [].slice.call(arguments);
987
var hr= {};
988
for( var i=0; i
989
var k= h[i];
990
var v= h[i+1];
991
hr[k]= v;
992
};
993
994
var myObserver= {
995
headers: hr,
996
observe: function(subject,topic,data) {
997
if(topic != 'http-on-modify-request') return;
998
999
var http = subject.QueryInterface(Ci.nsIHttpChannel);
1000
for( var k in this.headers) {
1001
var v= this.headers[k];
1002
http.setRequestHeader(k,v, false);
1003
1004
if (k== 'Referer' && http.referrer) {
1005
http.referrer.spec = v;
1006
};
1007
};
1008
}
1009
}
1010
observerService.addObserver(myObserver,'http-on-modify-request',false);
1011
return myObserver;
1012
};
1013
JS
1014
0
my $obs = $on_modify_request->(@headers);
1015
1016
# Clean up after ourselves
1017
0
$obs->__release_action(<<'JS');
1018
const Cc= Components.classes;
1019
const Ci= Components.interfaces;
1020
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1021
try {
1022
observerService.removeObserver(self,'http-on-modify-request',false);
1023
} catch (e) {}
1024
JS
1025
0
return $obs;
1026
};
1027
1028
sub add_header {
1029
0
0
1
my ($self, @headers) = @_;
1030
0
0
$self->{custom_header_observer} ||= $self->_custom_header_observer;
1031
1032
# This is slooow, but we only do it when changing the headers...
1033
0
my $h = $self->{custom_header_observer}->{headers};
1034
0
while( my ($k,$v) = splice @headers, 0, 2 ) {
1035
0
$h->{$k} = $v;
1036
};
1037
};
1038
1039
=head2 C<< $mech->delete_header( $name , $name2... ) >>
1040
1041
$mech->delete_header( 'User-Agent' );
1042
1043
Removes HTTP headers from the agent's list of special headers. Note
1044
that Firefox may still send a header with its default value.
1045
1046
=cut
1047
1048
sub delete_header {
1049
0
0
1
my ($self, @headers) = @_;
1050
1051
0
0
0
if( $self->{custom_header_observer} and @headers ) {
1052
# This is slooow, but we only do it when changing the headers...
1053
0
my $h = $self->{custom_header_observer}->{headers};
1054
1055
delete $h->{$_}
1056
0
for( @headers );
1057
};
1058
};
1059
1060
=head2 C<< $mech->reset_headers >>
1061
1062
$mech->reset_headers();
1063
1064
Removes all custom headers and makes Firefox send its defaults again.
1065
1066
=cut
1067
1068
sub reset_headers {
1069
0
0
1
my ($self) = @_;
1070
0
delete $self->{custom_header_observer};
1071
};
1072
1073
sub _addLoadEventListener {
1074
0
0
my ($self,%options) = @_;
1075
1076
0
0
$options{ tab } ||= $self->tab;
1077
0
0
$options{ window } ||= $self->application->getMostRecentWindow;
1078
0
0
$options{ events } ||= $self->events;
1079
0
my $add_load_listener = $self->repl->declare(<<'JS');
1080
function( mainWindow, tab, waitForLoad, events ) {
1081
var browser= mainWindow.gBrowser.getBrowserForTab( tab );
1082
1083
var lock= {
1084
"busy": 1,
1085
"log":[],
1086
"events": events,
1087
"browser": browser,
1088
"cb": undefined,
1089
"release": function() {
1090
for(var i=0; i
1091
this.browser.removeEventListener(this.events[i], this.cb, true);
1092
};
1093
}
1094
};
1095
var unloadedFrames= [];
1096
1097
lock.cb= function (e) {
1098
var t= e.target;
1099
var toplevel= (t == browser.contentDocument);
1100
lock.log.push("Event "+e.type);
1101
var reloadedFrame= false;
1102
lock.log.push( "" + unloadedFrames.length + " frames.");
1103
1104
if( "FRAME" == t.tagName
1105
|| "IFRAME" == t.tagName ) {
1106
loc= t.src;
1107
} else if( !t.tagName ) {
1108
// Document
1109
loc= t.URL;
1110
} else { // ignore
1111
lock.log.push("Ignoring " + e.type + " on " + t.tagName);
1112
};
1113
try {
1114
if( t instanceof HTMLDocument ) {
1115
// We are only interested in HTML pages here
1116
var container= t.defaultView.frameElement || browser.contentWindow;
1117
for( var i=0; i < unloadedFrames.length; i++ ) {
1118
try {
1119
// lock.log.push( "" + i + " " + unloadedFrames[i].id + " - " + unloadedFrames[i].src );
1120
reloadedFrame= reloadedFrame
1121
|| unloadedFrames[i] === container;
1122
} catch (e) {
1123
// alert("Some frame element has gone away already...");
1124
};
1125
// alert("Caught " + e.type + " on remembered element. Great - " + reloadedFrame);
1126
};
1127
1128
if ("pagehide" == e.type && container ) {
1129
// alert("pagehide on container /lock"+lock.id);
1130
// A frame or window gets reloaded.
1131
// A frame gets reloaded. We remember it so we can
1132
// tell when it has completed. We won't get a separate
1133
// completion event on the parent document :-(
1134
lock.log.push("Remembering frame parent, for 'load' event");
1135
unloadedFrames.push( container );
1136
// Maybe we should just attach all events here?!
1137
};
1138
};
1139
} catch (e) { alert("Error while looking: " + e.message+" " + e.line) };
1140
1141
// if (! toplevel && !reloadedFrame ) { return ; };
1142
lock.log.push("<> " + e.type + " on " + loc);
1143
1144
if( (reloadedFrame)
1145
// && !waitForLoad
1146
&& "DOMContentLoaded" == e.type
1147
) {
1148
// We loaded a document
1149
// See if it contains (i)frames
1150
// and wait for "load" to fire if so
1151
// alert("Reloaded a container /lock:" + lock.id);
1152
lock.log.push("DOMContentLoaded for toplevel");
1153
var q= "//IFRAME|//FRAME";
1154
var frames= t.evaluate(q,t,null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ).snapshotLength;
1155
lock.log.push("Found " + frames + " frames");
1156
if( frames ) {
1157
lock.log.push("Waiting for 'load' because we found frames");
1158
waitForLoad= true;
1159
} else if( /^about:neterror\?/.test( loc ) || !waitForLoad ) {
1160
lock.log.push("Early out on DOMContentLoaded");
1161
lock.busy= 0;
1162
};
1163
1164
} else if( (reloadedFrame)
1165
&& ( "load" == e.type
1166
|| "pageshow" == e.type
1167
)) { // We always are done on "load" on toplevel
1168
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1169
lock.busy= 0;
1170
1171
} else if( (toplevel || reloadedFrame)
1172
&& ("error" == e.type || "stop" == e.type)) { // We always are done on "load" on toplevel
1173
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1174
lock.busy= 0;
1175
};
1176
1177
};
1178
1179
for(var i=0; i
1180
browser.addEventListener(events[i], lock.cb, true);
1181
};
1182
lock.log.push("Listening");
1183
1184
return lock
1185
}
1186
JS
1187
0
return $add_load_listener->($options{ window }, $options{ tab }, 1, $options{ events } );
1188
}
1189
1190
sub _addEventListener {
1191
0
0
my ($self,@args) = @_;
1192
0
0
0
if (@args <= 2 and ref($args[0]) eq 'MozRepl::RemoteObject::Instance') {
1193
0
@args = [@args];
1194
};
1195
0
for (@args) {
1196
0
0
$_->[1] ||= $self->events;
1197
0
0
$_->[1] = [$_->[1]]
1198
unless ref $_->[1];
1199
};
1200
# Now, flatten the arg list again...
1201
0
@args = map { @$_ } @args;
0
1202
1203
# This registers multiple events for a one-shot event
1204
0
my $make_semaphore = $self->repl->declare(<<'JS');
1205
function() {
1206
var lock = { "busy": 0, "event" : null };
1207
var listeners = [];
1208
var pairs = arguments;
1209
for( var k = 0; k < pairs.length ; k++) {
1210
var b = pairs[k];
1211
k++;
1212
var events = pairs[k];
1213
1214
for( var i = 0; i < events.length; i++) {
1215
var evname = events[i];
1216
var callback = (function(listeners,evname){
1217
return function(e) {
1218
if (! lock.busy) {
1219
lock.busy++;
1220
lock.event = e.type;
1221
lock.js_event = {};
1222
lock.js_event.target = e.originalTarget;
1223
lock.js_event.type = e.type;
1224
//alert("Caught first event " + e.type + " " + e.message);
1225
} else {
1226
//alert("Caught duplicate event " + e.type + " " + e.message);
1227
};
1228
for( var j = 0; j < listeners.length; j++) {
1229
listeners[j][0].removeEventListener(listeners[j][1],listeners[j][2],true);
1230
};
1231
};
1232
})(listeners,evname);
1233
listeners.push([b,evname,callback]);
1234
b.addEventListener(evname,callback,true);
1235
};
1236
};
1237
return lock
1238
}
1239
JS
1240
# $browser,$events
1241
0
return $make_semaphore->(@args);
1242
};
1243
1244
sub _wait_while_busy {
1245
0
0
my ($self,@elements) = @_;
1246
# Now do the busy-wait
1247
# Should this also include a ->poll()
1248
# and a callback?
1249
1250
0
while (1) {
1251
0
for my $element (@elements) {
1252
0
0
0
if ((my $s = $element->{busy} || 0) < 1) {
1253
0
for my $element (@elements) {
1254
0
push @{ $self->{event_log} },
0
1255
0
join "\n", @{ $element->{log}};
1256
};
1257
0
return $element;
1258
};
1259
};
1260
0
sleep 0.1;
1261
1262
# if (time-$timer > 4) {
1263
# $timer= time;
1264
# for my $element (@elements) {
1265
# for (@{ $element->{log}}) {
1266
# print $_,"\n";
1267
# };
1268
# print "---\n";
1269
# };
1270
# };
1271
};
1272
}
1273
1274
=head2 C<< $mech->synchronize( $event, $callback ) >>
1275
1276
Wraps a synchronization semaphore around the callback
1277
and waits until the event C<$event> fires on the browser.
1278
If you want to wait for one of multiple events to occur,
1279
pass an array reference as the first parameter.
1280
1281
Usually, you want to use it like this:
1282
1283
my $l = $mech->xpath('//a[@onclick]', single => 1);
1284
$mech->synchronize('DOMFrameContentLoaded', sub {
1285
$mech->click( $l );
1286
});
1287
1288
It is necessary to synchronize with the browser whenever
1289
a click performs an action that takes longer and
1290
fires an event on the browser object.
1291
1292
The C event is fired by Firefox when
1293
the whole DOM and all C
1294
If your document doesn't have frames, use the C
1295
event instead.
1296
1297
If you leave out C<$event>, the value of C<< ->events() >> will
1298
be used instead.
1299
1300
=cut
1301
1302
sub _install_response_header_listener {
1303
0
0
my ($self) = @_;
1304
1305
0
weaken $self;
1306
1307
# Pre-Filter the progress on the JS side of things so we
1308
# don't get that much traffic back and forth between Perl and JS
1309
0
my $make_state_change_filter = $self->repl->declare(<<'JS');
1310
function (cb,console) {
1311
const STATE_START = Components.interfaces.nsIWebProgressListener.STATE_START;
1312
const STATE_STOP = Components.interfaces.nsIWebProgressListener.STATE_STOP;
1313
const STATE_TRANSFERRING = Components.interfaces.nsIWebProgressListener.STATE_TRANSFERRING;
1314
const STATE_IS_DOCUMENT = Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT;
1315
const STATE_IS_WINDOW = Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW;
1316
1317
return function (progress,request,flags,status) {
1318
if( 0 && console ) {
1319
const nsIChannel = Components.interfaces.nsIChannel;
1320
var ch = request.QueryInterface(nsIChannel);
1321
1322
console.log("STATE: "
1323
+ (flags & STATE_START ? "s" : "-")
1324
+ (flags & STATE_STOP ? "S" : "-")
1325
+ (flags & STATE_TRANSFERRING ? "T" : "-")
1326
+ (flags & STATE_IS_DOCUMENT ? "D" : "-")
1327
+ (flags & STATE_IS_WINDOW ? "W" : "-")
1328
+ " " + status
1329
+ " " + ch.originalURI.spec
1330
+ " -> " + ch.URI.spec
1331
);
1332
};
1333
// if (flags & (STATE_STOP|STATE_IS_WINDOW) == (STATE_STOP|STATE_IS_WINDOW)) {
1334
if (flags & (STATE_STOP|STATE_IS_DOCUMENT) == (STATE_STOP|STATE_IS_DOCUMENT)) {
1335
cb(progress,request,flags,status);
1336
} else if ((flags & STATE_STOP) == STATE_STOP) {
1337
cb(progress,request,flags,status);
1338
}
1339
}
1340
}
1341
JS
1342
1343
# These should be cached and optimized into one hash query
1344
0
my $STATE_STOP = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_STOP');
1345
0
my $STATE_IS_DOCUMENT = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT');
1346
0
my $STATE_IS_WINDOW = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW');
1347
1348
my $state_change = $make_state_change_filter->(sub {
1349
0
0
my ($progress,$request,$flags,$status) = @_;
1350
#warn sprintf "State : %032b %08x\n", $flags, $status;
1351
#warn sprintf " %032b\n", $STATE_STOP | $STATE_IS_DOCUMENT | $STATE_IS_WINDOW ;
1352
1353
0
0
0
if ( $STATE_STOP == $flags # some error
1354
or ($flags & ($STATE_STOP | $STATE_IS_DOCUMENT)) == ($STATE_STOP | $STATE_IS_DOCUMENT)) {
1355
0
0
if ($status == 0 ) {
1356
#warn "Storing request to response";
1357
#warn "URI ".$request->{URI}->{asciiSpec};
1358
0
0
$self->{ response } ||= $request;
1359
} else {
1360
#warn "Erasing response";
1361
0
undef $self->{ response };
1362
};
1363
};
1364
#}, $self->tab->{linkedBrowser}->{contentWindow}->{console}, $lock);
1365
0
}, $self->tab->{linkedBrowser}->{contentWindow}->{console});
1366
1367
0
my $browser = $self->tab->{linkedBrowser};
1368
1369
# These should mimick the LWP::UserAgent events maybe?
1370
0
return $self->progress_listener(
1371
$browser,
1372
onStateChange => $state_change,
1373
#onProgressChange => sub { print "Progress : @_\n" },
1374
#onLocationChange => sub { printf "Location : %s\n", $_[2]->{spec} },
1375
#onStatusChange => sub { print "Status : @_\n"; },
1376
);
1377
};
1378
1379
sub synchronize {
1380
0
0
1
my ($self,$events,$callback) = @_;
1381
0
0
0
if (ref $events and ref $events eq 'CODE') {
1382
0
$callback = $events;
1383
0
$events = $self->events;
1384
};
1385
1386
0
0
$events = [ $events ]
1387
unless ref $events;
1388
1389
0
undef $self->{response};
1390
1391
0
my $need_response = defined wantarray;
1392
0
my $response_catcher = $self->_install_response_header_listener();
1393
1394
0
my $load_lock = $self->_addLoadEventListener( tab => $self->tab, events => $events );
1395
0
$callback->();
1396
1397
0
my $ev = $self->_wait_while_busy($load_lock);
1398
0
0
if (my $h = $self->{on_event}) {
1399
0
0
if (ref $h eq 'CODE') {
1400
0
$h->($ev)
1401
} else {
1402
0
warn "Received $ev->{event}";
1403
#warn "$ev->{event}->{text}"";
1404
};
1405
};
1406
1407
# Clean up our event listener
1408
0
$load_lock->release;
1409
1410
0
undef $response_catcher;
1411
# Response catcher gets released here
1412
1413
0
$self->signal_http_status;
1414
0
0
if ($need_response) {
1415
0
return $self->response
1416
};
1417
};
1418
1419
=head2 C<< $mech->res() >> / C<< $mech->response(%options) >>
1420
1421
my $response = $mech->response(headers => 0);
1422
1423
Returns the current response as a L object.
1424
1425
The C option tells the module whether to fetch the headers
1426
from Firefox or not. This is mainly an internal optimization hack.
1427
1428
=cut
1429
1430
sub _headerVisitor {
1431
0
0
my ($self,$cb) = @_;
1432
0
my $obj = $self->repl->expr('new Object');
1433
0
$obj->{visitHeader} = $cb;
1434
0
$obj
1435
};
1436
1437
sub _extract_response {
1438
0
0
my ($self,$request,%options) = @_;
1439
1440
0
my $nsIHttpChannel = $self->repl->constant('Components.interfaces.nsIHttpChannel');
1441
0
my $httpChannel = $request->QueryInterface($nsIHttpChannel);
1442
1443
0
my @headers;
1444
0
0
if( $options{ headers }) {
1445
0
0
my $v = $self->_headerVisitor(sub{push @headers, @_});
0
1446
1447
# If this fails, we're calling it too early :-(
1448
0
$httpChannel->visitResponseHeaders($v);
1449
};
1450
1451
0
my $res = HTTP::Response->new(
1452
$httpChannel->{responseStatus},
1453
$httpChannel->{responseStatusText},
1454
\@headers,
1455
undef, # no body so far
1456
);
1457
0
return $res;
1458
};
1459
1460
sub response {
1461
0
0
1
my ($self, %options) = @_;
1462
1463
0
0
if( ! exists $options{ headers }) {
1464
0
$options{ headers } = 1;
1465
};
1466
1467
# If we still have a valid JS response,
1468
# create a HTTP::Response from that
1469
0
0
if (my $js_res = $self->{ response }) {
1470
#my $ouri = $js_res->{originalURI};
1471
0
my $ouri = $js_res->{URI};
1472
0
my $scheme = '';
1473
#warn "Reading response for ".$js_res->{URI}->{asciiSpec};
1474
#warn " original ".$js_res->{originalURI}->{asciiSpec};
1475
0
0
if ($ouri) {
1476
0
$scheme = $ouri->{scheme};
1477
};
1478
1479
0
0
0
if ($scheme and $scheme =~ /^https?/) {
0
0
1480
# We can only extract from a HTTP Response
1481
0
return $self->_extract_response( $js_res, %options );
1482
} elsif ($scheme and $scheme =~ /^(file|data|about)\b/) {
1483
# We're cool!
1484
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1485
} else {
1486
# We'll make up a response, below
1487
#my $url = $self->document->{documentURI};
1488
#carp "Making up a response for unknown URL scheme '$scheme' (from '$url')";
1489
};
1490
};
1491
1492
# Otherwise, make up a reason:
1493
0
my $eff_url = $self->document->{documentURI};
1494
#warn $eff_url;
1495
0
0
if ($eff_url =~ /^about:neterror/) {
1496
# this is an error
1497
0
return HTTP::Response->new(500)
1498
};
1499
1500
# We're cool, except we don't know what we're doing here:
1501
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1502
}
1503
*res = \&response;
1504
1505
=head2 C<< $mech->success() >>
1506
1507
$mech->get('http://google.com');
1508
print "Yay"
1509
if $mech->success();
1510
1511
Returns a boolean telling whether the last request was successful.
1512
If there hasn't been an operation yet, returns false.
1513
1514
This is a convenience function that wraps C<< $mech->res->is_success >>.
1515
1516
=cut
1517
1518
sub success {
1519
0
0
1
my $res = $_[0]->response( headers => 0 );
1520
0
0
$res and $res->is_success
1521
}
1522
1523
=head2 C<< $mech->status() >>
1524
1525
$mech->get('http://google.com');
1526
print $mech->status();
1527
# 200
1528
1529
Returns the HTTP status code of the response.
1530
This is a 3-digit number like 200 for OK, 404 for not found, and so on.
1531
1532
=cut
1533
1534
sub status {
1535
0
0
1
my ($self) = @_;
1536
0
return $self->response( headers => 0 )->code
1537
};
1538
1539
=head2 C<< $mech->reload( [$bypass_cache] ) >>
1540
1541
$mech->reload();
1542
1543
Reloads the current page. If C<$bypass_cache>
1544
is a true value, the browser is not allowed to
1545
use a cached page. This is the difference between
1546
pressing C (cached) and C (uncached).
1547
1548
Returns the (new) response.
1549
1550
=cut
1551
1552
sub reload {
1553
0
0
1
my ($self, $bypass_cache) = @_;
1554
0
0
$bypass_cache ||= 0;
1555
0
0
if ($bypass_cache) {
1556
0
$bypass_cache = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
1557
};
1558
$self->synchronize( sub {
1559
0
0
$self->tab->{linkedBrowser}->reloadWithFlags($bypass_cache);
1560
0
});
1561
}
1562
1563
# Internal convenience method for dipatching a call either synchronized
1564
# or not
1565
sub _sync_call {
1566
0
0
my ($self, $events, $cb) = @_;
1567
1568
0
0
if (@$events) {
1569
0
$self->synchronize( $events, $cb );
1570
} else {
1571
0
$cb->();
1572
};
1573
};
1574
1575
=head2 C<< $mech->back( [$synchronize] ) >>
1576
1577
$mech->back();
1578
1579
Goes one page back in the page history.
1580
1581
Returns the (new) response.
1582
1583
=cut
1584
1585
sub back {
1586
0
0
1
my ($self, $synchronize) = @_;
1587
0
0
$synchronize ||= (@_ != 2);
1588
0
0
if( !ref $synchronize ) {
1589
0
0
$synchronize = $synchronize
1590
? $self->events
1591
: []
1592
};
1593
1594
$self->_sync_call($synchronize, sub {
1595
0
0
$self->tab->{linkedBrowser}->goBack;
1596
0
});
1597
}
1598
1599
=head2 C<< $mech->forward( [$synchronize] ) >>
1600
1601
$mech->forward();
1602
1603
Goes one page forward in the page history.
1604
1605
Returns the (new) response.
1606
1607
=cut
1608
1609
sub forward {
1610
0
0
1
my ($self, $synchronize) = @_;
1611
0
0
$synchronize ||= (@_ != 2);
1612
0
0
if( !ref $synchronize ) {
1613
0
0
$synchronize = $synchronize
1614
? $self->events
1615
: []
1616
};
1617
1618
$self->_sync_call($synchronize, sub {
1619
0
0
$self->tab->{linkedBrowser}->goForward;
1620
0
});
1621
}
1622
1623
=head2 C<< $mech->uri() >>
1624
1625
print "We are at " . $mech->uri;
1626
1627
Returns the current document URI.
1628
1629
=cut
1630
1631
sub uri {
1632
0
0
1
my ($self) = @_;
1633
0
my $loc = $self->tab->MozRepl::RemoteObject::Methods::dive(qw[
1634
linkedBrowser
1635
currentURI
1636
asciiSpec ]);
1637
0
return URI->new( $loc );
1638
};
1639
1640
=head1 CONTENT METHODS
1641
1642
=head2 C<< $mech->document() >>
1643
1644
Returns the DOM document object.
1645
1646
This is WWW::Mechanize::Firefox specific.
1647
1648
=cut
1649
1650
sub document {
1651
0
0
1
my ($self) = @_;
1652
#$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentWindow document]);
1653
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentDocument]);
1654
}
1655
1656
=head2 C<< $mech->docshell() >>
1657
1658
my $ds = $mech->docshell;
1659
1660
Returns the C Javascript object associated with the tab.
1661
1662
This is WWW::Mechanize::Firefox specific.
1663
1664
=cut
1665
1666
sub docshell {
1667
0
0
1
my ($self) = @_;
1668
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser docShell]);
1669
}
1670
1671
=head2 C<< $mech->content( %options ) >>
1672
1673
print $mech->content;
1674
print $mech->content( format => 'html' ); # default
1675
print $mech->content( format => 'text' ); # identical to ->text
1676
1677
This always returns the content as a Unicode string. It tries
1678
to decode the raw content according to its input encoding.
1679
This currently only works for HTML pages, not for images etc.
1680
1681
Recognized options:
1682
1683
=over 4
1684
1685
=item *
1686
1687
C - the document to use.
1688
1689
Default is C<< $self->document >>.
1690
1691
=item *
1692
1693
C - the stuff to return
1694
1695
The allowed values are C and C. The default is C.
1696
1697
=back
1698
1699
=cut
1700
1701
sub content {
1702
0
0
1
my ($self, %options) = @_;
1703
0
0
$options{ format } ||= 'html';
1704
1705
0
0
my $d = delete $options{ document } || $self->document; # keep a reference to it!
1706
0
0
my $format = delete $options{ format } || 'html';
1707
0
my $content;
1708
1709
0
0
if( $format eq 'html' ) {
0
1710
0
my $html = $self->repl->declare(<<'JS', 'list');
1711
function(d){
1712
var e = d.createElement("div");
1713
e.appendChild(d.documentElement.cloneNode(true));
1714
return [e.innerHTML,d.inputEncoding];
1715
}
1716
JS
1717
# We return the raw bytes here.
1718
0
($content,my $encoding) = $html->($d);
1719
0
0
if (! utf8::is_utf8($content)) {
1720
#warn "Switching on UTF-8 (from $encoding)";
1721
# Switch on UTF-8 flag
1722
# This should never happen, as JSON::XS (and JSON) should always
1723
# already return proper UTF-8
1724
# But it does happen.
1725
0
$content = Encode::decode($encoding, $content);
1726
};
1727
} elsif ( $format eq 'text' ) {
1728
0
$content = $self->text;
1729
}
1730
else {
1731
0
$self->die( qq{Unknown "format" parameter "$format"} );
1732
}
1733
1734
0
return $content
1735
};
1736
1737
=head2 C<< $mech->text() >>
1738
1739
Returns the text of the current HTML content. If the content isn't
1740
HTML, $mech will die.
1741
1742
=cut
1743
1744
sub text {
1745
0
0
1
my $self = shift;
1746
1747
# Waugh - this is highly inefficient but conveniently short to write
1748
# Maybe this should skip SCRIPT nodes...
1749
0
join '', map { $_->{nodeValue} } $self->xpath('//*/text()');
0
1750
}
1751
1752
1753
=head2 C<< $mech->content_encoding() >>
1754
1755
print "The content is encoded as ", $mech->content_encoding;
1756
1757
Returns the encoding that the content is in. This can be used
1758
to convert the content from UTF-8 back to its native encoding.
1759
1760
=cut
1761
1762
sub content_encoding {
1763
0
0
1
my ($self, $d) = @_;
1764
0
0
$d ||= $self->document; # keep a reference to it!
1765
0
return $d->{inputEncoding};
1766
};
1767
1768
=head2 C<< $mech->update_html( $html ) >>
1769
1770
$mech->update_html($html);
1771
1772
Writes C<$html> into the current document. This is mostly
1773
implemented as a convenience method for L.
1774
1775
=cut
1776
1777
sub update_html {
1778
0
0
1
my ($self,$content) = @_;
1779
0
my $url = URI->new('data:');
1780
0
$url->media_type("text/html");
1781
0
$url->data($content);
1782
$self->synchronize($self->events, sub {
1783
0
0
$self->tab->{linkedBrowser}->loadURI("$url");
1784
0
});
1785
return
1786
0
};
1787
1788
=head2 C<< $mech->save_content( $localname [, $resource_directory] [, %options ] ) >>
1789
1790
$mech->get('http://google.com');
1791
$mech->save_content('google search page','google search page files');
1792
1793
Saves the given URL to the given filename. The URL will be
1794
fetched from the cache if possible, avoiding unnecessary network
1795
traffic.
1796
1797
If C<$resource_directory> is given, the whole page will be saved.
1798
All CSS, subframes and images
1799
will be saved into that directory, while the page HTML itself will
1800
still be saved in the file pointed to by C<$localname>.
1801
1802
Returns a C object through which you can cancel the
1803
download by calling its C<< ->cancelSave >> method. Also, you can poll
1804
the download status through the C<< ->{currentState} >> property.
1805
1806
If you need to set persist flags pass the unsigned long value in the
1807
C option.
1808
1809
$mech->get('http://zombisoft.com');
1810
$mech->save_content('Zombisoft','zombisoft-resource-files', "persist" => 512 | 2048);
1811
1812
A list of flags and their values can be found at
1813
L.
1814
1815
If you are interested in the intermediate download progress, create
1816
a ProgressListener through C<< $mech->progress_listener >>
1817
and pass it in the C option.
1818
1819
The download will
1820
continue in the background. It will not show up in the
1821
Download Manager.
1822
1823
=cut
1824
1825
sub save_content {
1826
0
0
1
my ($self,$localname,$resource_directory,%options) = @_;
1827
1828
0
$localname = File::Spec->rel2abs($localname, '.');
1829
# Touch the file
1830
0
0
if (! -f $localname) {
1831
0
0
open my $fh, '>', $localname
1832
or die "Couldn't create '$localname': $!";
1833
};
1834
1835
0
0
if ($resource_directory) {
1836
0
$resource_directory = File::Spec->rel2abs($resource_directory, '.');
1837
1838
# Create the directory
1839
0
0
if (! -d $resource_directory) {
1840
0
0
mkdir $resource_directory
1841
or die "Couldn't create '$resource_directory': $!";
1842
};
1843
};
1844
1845
0
my $transfer_file = $self->repl->declare(<<'JS');
1846
function (document,filetarget,rscdir,progress,persistflags) {
1847
//new file object
1848
var obj_target;
1849
if (filetarget) {
1850
obj_target = Components.classes["@mozilla.org/file/local;1"]
1851
.createInstance(Components.interfaces.nsILocalFile);
1852
};
1853
1854
//set file with path
1855
obj_target.initWithPath(filetarget);
1856
1857
var obj_rscdir;
1858
if (rscdir) {
1859
obj_rscdir = Components.classes["@mozilla.org/file/local;1"]
1860
.createInstance(Components.interfaces.nsILocalFile);
1861
obj_rscdir.initWithPath(rscdir);
1862
};
1863
1864
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1865
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1866
1867
// with persist flags if desired
1868
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1869
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1870
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1871
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1872
| persistflags
1873
;
1874
1875
obj_Persist.progressListener = progress;
1876
1877
//save file to target
1878
obj_Persist.saveDocument(document,obj_target, obj_rscdir, null,0,0);
1879
return obj_Persist
1880
};
1881
JS
1882
#warn "=> $localname / $resource_directory";
1883
0
$transfer_file->(
1884
$self->document,
1885
$localname,
1886
$resource_directory,
1887
$options{progress},
1888
$options{persist}
1889
);
1890
}
1891
1892
=head2 C<< $mech->save_url( $url, $localname, [%options] ) >>
1893
1894
$mech->save_url('http://google.com','google_index.html');
1895
1896
Saves the given URL to the given filename. The URL will be
1897
fetched from the cache if possible, avoiding unnecessary network
1898
traffic.
1899
1900
If you are interested in the intermediate download progress, create
1901
a ProgressListener through C<< $mech->progress_listener >>
1902
and pass it in the C option.
1903
The download will
1904
continue in the background. It will also not show up in the
1905
Download Manager.
1906
1907
If the C option is not passed in, C< ->save_url >
1908
will only return after the download has finished.
1909
1910
Returns a C object through which you can cancel the
1911
download by calling its C<< ->cancelSave >> method. Also, you can poll
1912
the download status through the C<< ->{currentState} >> property.
1913
1914
=cut
1915
1916
sub save_url {
1917
0
0
1
my ($self,$url,$localname,%options) = @_;
1918
1919
0
$localname = File::Spec->rel2abs($localname, '.');
1920
1921
0
0
if (! -f $localname) {
1922
0
0
open my $fh, '>', $localname
1923
or die "Couldn't create '$localname': $!";
1924
};
1925
1926
0
my $res;
1927
0
0
if( ! $options{ progress }) {
1928
0
$options{ wait } = 1;
1929
# We will do a synchronous download
1930
0
my $STATE_FINISHED = $self->repl->constant('Components.interfaces.nsIWebBrowserPersist.PERSIST_STATE_FINISHED');
1931
$options{ progress }= $self->make_progress_listener(onStateChange => sub {
1932
0
0
my ($webprogress,$request,$flags,$status) = @_;
1933
0
0
if( $res->{currentState} == $STATE_FINISHED) {
1934
0
$options{ wait }= 0;
1935
};
1936
},
1937
# onProgressChange => sub {
1938
# my ($aWebProgress, $aRequest, $aCurSelfProgress, $aMaxSelfProgress, $aCurTotalProgress, $aMaxTotalProgress)= @_;
1939
#diag sprintf "%03.2f", $aCurTotalProgress / ($aMaxTotalProgress||1) * 100;
1940
#}
1941
0
);
1942
};
1943
1944
0
my $transfer_file = $self->repl->declare(<<'JS');
1945
function (source,filetarget,progress,tab) {
1946
//new obj_URI object
1947
var obj_URI = Components.classes["@mozilla.org/network/io-service;1"]
1948
.getService(Components.interfaces.nsIIOService).newURI(source, null, null);
1949
1950
//new file object
1951
var obj_target;
1952
if (filetarget) {
1953
obj_target = Components.classes["@mozilla.org/file/local;1"]
1954
.createInstance(Components.interfaces.nsILocalFile);
1955
};
1956
1957
//set file with path
1958
obj_target.initWithPath(filetarget);
1959
1960
//new persistence object
1961
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1962
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1963
1964
// with persist flags if desired
1965
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1966
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1967
// Also make it send the proper cookies
1968
// If we are on a 3.0 Firefox, PERSIST_FLAGS_FORCE_ALLOW_COOKIES does
1969
// not exist, so we need to get creative:
1970
1971
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1972
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1973
;
1974
obj_Persist.progressListener = progress;
1975
/* {
1976
"onStateChange": function() {
1977
var myargs= Array.slice(arguments);
1978
alert("onStateChange (" + myargs.join(",")+")");
1979
try {
1980
progress.onStateChange.apply(null,arguments);
1981
} catch(e) {
1982
alert(e.message);
1983
};
1984
},
1985
"onProgressChange": function() {
1986
var myargs= Array.slice(arguments);
1987
alert("onProgressChange (" + myargs.join(",")+")");
1988
try {
1989
progress.onProgressChange.apply(null,arguments);
1990
} catch(e) {
1991
alert(e.message);
1992
};
1993
}
1994
};
1995
*/
1996
1997
// Since Firefox 18, we need to provide a proper privacyContext
1998
// This is cobbled together from half-documented parts in various places
1999
// of the Mozilla documentation. The changes file does not list the
2000
// necessary steps :-(
2001
// https://developer.mozilla.org/en-US/docs/Supporting_per-window_private_browsing
2002
// The documentation is even wrong. It recommends to import("chrome://gre/modules/PrivateBrowsingUtils.jsm")
2003
// but the correct URL is "resource://gre/modules/PrivateBrowsingUtils.jsm".
2004
// Also, the method is not named "getPrivacyContextFromWindow" but "privacyContextFromWindow".
2005
var privacyContext;
2006
var version = Components.classes["@mozilla.org/xre/app-info;1"]
2007
.getService(Components.interfaces.nsIXULAppInfo).version;
2008
if( version >= 18.0 ) {
2009
Components.utils.import("resource://gre/modules/PrivateBrowsingUtils.jsm");
2010
privacyContext = PrivateBrowsingUtils.privacyContextFromWindow(tab.linkedBrowser.contentDocument.defaultView);
2011
};
2012
2013
//save file to target
2014
obj_Persist.saveURI(obj_URI,null,null,null,null,obj_target,privacyContext);
2015
return obj_Persist
2016
};
2017
JS
2018
0
$res= $transfer_file->("$url" => $localname, $options{progress}, $self->tab);
2019
0
while( $options{ wait }) {
2020
0
$self->repl->poll;
2021
sleep 1
2022
0
0
if $options{ wait };
2023
};
2024
0
$res
2025
}
2026
2027
=head2 C<< $mech->base() >>
2028
2029
print $mech->base;
2030
2031
Returns the URL base for the current page.
2032
2033
The base is either specified through a C
2034
tag or is the current URL.
2035
2036
This method is specific to WWW::Mechanize::Firefox
2037
2038
=cut
2039
2040
sub base {
2041
0
0
1
my ($self) = @_;
2042
0
(my $base) = $self->selector('base');
2043
0
0
$base = $base->{href}
2044
if $base;
2045
0
0
$base ||= $self->uri;
2046
};
2047
2048
=head2 C<< $mech->content_type() >>
2049
2050
=head2 C<< $mech->ct() >>
2051
2052
print $mech->content_type;
2053
2054
Returns the content type of the currently loaded document
2055
2056
=cut
2057
2058
sub content_type {
2059
0
0
1
my ($self) = @_;
2060
0
return $self->document->{contentType};
2061
};
2062
2063
*ct = \&content_type;
2064
2065
=head2 C<< $mech->is_html() >>
2066
2067
print $mech->is_html();
2068
2069
Returns true/false on whether our content is HTML, according to the
2070
HTTP headers.
2071
2072
=cut
2073
2074
sub is_html {
2075
0
0
1
my $self = shift;
2076
0
0
return defined $self->ct && ($self->ct eq 'text/html');
2077
}
2078
2079
=head2 C<< $mech->title() >>
2080
2081
print "We are on page " . $mech->title;
2082
2083
Returns the current document title.
2084
2085
=cut
2086
2087
sub title {
2088
0
0
1
my ($self) = @_;
2089
0
return $self->document->{title};
2090
};
2091
2092
=head1 EXTRACTION METHODS
2093
2094
=head2 C<< $mech->links() >>
2095
2096
print $_->text . " -> " . $_->url . "\n"
2097
for $mech->links;
2098
2099
Returns all links in the document as L objects.
2100
2101
Currently accepts no parameters. See C<< ->xpath >>
2102
or C<< ->selector >> when you want more control.
2103
2104
=cut
2105
2106
%link_spec = (
2107
a => { url => 'href', },
2108
area => { url => 'href', },
2109
frame => { url => 'src', },
2110
iframe => { url => 'src', },
2111
link => { url => 'href', },
2112
meta => { url => 'content', xpath => (join '',
2113
q{translate(@http-equiv,'ABCDEFGHIJKLMNOPQRSTUVWXYZ',},
2114
q{'abcdefghijklmnopqrstuvwxyz')="refresh"}), },
2115
);
2116
2117
# taken from WWW::Mechanize. This should possibly just be reused there
2118
sub make_link {
2119
0
0
0
my ($self,$node,$base) = @_;
2120
0
my $tag = lc $node->{tagName};
2121
2122
0
0
if (! exists $link_spec{ $tag }) {
2123
0
warn "Unknown tag '$tag'";
2124
};
2125
0
my $url = $node->{ $link_spec{ $tag }->{url} };
2126
2127
0
0
if ($tag eq 'meta') {
2128
0
my $content = $url;
2129
0
0
if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) {
2130
0
$url = $1;
2131
0
0
$url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/;
2132
}
2133
else {
2134
0
undef $url;
2135
}
2136
};
2137
2138
0
0
if (defined $url) {
2139
0
my $res = WWW::Mechanize::Link->new({
2140
tag => $tag,
2141
name => $node->{name},
2142
base => $base,
2143
url => $url,
2144
text => $node->{innerHTML},
2145
attrs => {},
2146
});
2147
2148
0
$res
2149
} else {
2150
()
2151
0
};
2152
}
2153
2154
sub links {
2155
0
0
1
my ($self) = @_;
2156
0
my @links = $self->selector( join ",", sort keys %link_spec);
2157
0
my $base = $self->base;
2158
0
return map {
2159
0
$self->make_link($_,$base)
2160
} @links;
2161
};
2162
2163
# Call croak or carp, depending on the C< autodie > setting
2164
sub signal_condition {
2165
0
0
0
my ($self,$msg) = @_;
2166
0
0
if ($self->{autodie}) {
2167
0
croak $msg
2168
} else {
2169
0
carp $msg
2170
}
2171
};
2172
2173
# Call croak on the C< autodie > setting if we have a non-200 status
2174
sub signal_http_status {
2175
0
0
0
my ($self) = @_;
2176
0
0
if ($self->{autodie}) {
2177
0
0
if ($self->status !~ /^2/) {
2178
# there was an error
2179
0
0
croak ($self->response(headers => 0)->message || sprintf "Got status code %d", $self->status );
2180
};
2181
} else {
2182
# silent
2183
}
2184
};
2185
2186
=head2 C<< $mech->find_link_dom( %options ) >>
2187
2188
print $_->{innerHTML} . "\n"
2189
for $mech->find_link_dom( text_contains => 'CPAN' );
2190
2191
A method to find links, like L's
2192
C<< ->find_links >> method. This method returns DOM objects from
2193
Firefox instead of WWW::Mechanize::Link objects.
2194
2195
Note that Firefox
2196
might have reordered the links or frame links in the document
2197
so the absolute numbers passed via C
2198
might not be the same between
2199
L and L.
2200
2201
Returns the DOM object as L::Instance.
2202
2203
The supported options are:
2204
2205
=over 4
2206
2207
=item *
2208
2209
C<< text >> and C<< text_contains >> and C<< text_regex >>
2210
2211
Match the text of the link as a complete string, substring or regular expression.
2212
2213
Matching as a complete string or substring is a bit faster, as it is
2214
done in the XPath engine of Firefox.
2215
2216
=item *
2217
2218
C<< id >> and C<< id_contains >> and C<< id_regex >>
2219
2220
Matches the C attribute of the link completely or as part
2221
2222
=item *
2223
2224
C<< name >> and C<< name_contains >> and C<< name_regex >>
2225
2226
Matches the C attribute of the link
2227
2228
=item *
2229
2230
C<< url >> and C<< url_regex >>
2231
2232
Matches the URL attribute of the link (C, C or C).
2233
2234
=item *
2235
2236
C<< class >> - the C attribute of the link
2237
2238
=item *
2239
2240
C<< n >> - the (1-based) index. Defaults to returning the first link.
2241
2242
=item *
2243
2244
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2245
or carp, depending on the C parameter.
2246
2247
=item *
2248
2249
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2250
or carp, depending on the C parameter.
2251
2252
The method Cs if no link is found. If the C option is true,
2253
it also Cs when more than one link is found.
2254
2255
=back
2256
2257
=cut
2258
2259
80
80
522
use vars '%xpath_quote';
80
111
80
388034
2260
%xpath_quote = (
2261
'"' => '\"',
2262
#"'" => "\\'",
2263
#'[' => '[',
2264
#']' => ']',
2265
#'[' => '[\[]',
2266
#'[' => '\[',
2267
#']' => '[\]]',
2268
);
2269
2270
# Return the default limiter if no other limiting option is set:
2271
sub _default_limiter {
2272
0
0
my ($default, $options) = @_;
2273
0
0
if (! grep { exists $options->{ $_ } } qw(single one maybe all any)) {
0
2274
0
$options->{ $default } = 1;
2275
};
2276
return ()
2277
0
};
2278
2279
sub quote_xpath($) {
2280
0
0
0
local $_ = $_[0];
2281
0
0
s/(['"\[\]])/$xpath_quote{$1} || $1/ge;
0
2282
0
$_
2283
};
2284
2285
#sub perl_regex_to_xpath($) {
2286
# my ($re) = @_;
2287
# my $flags = '';
2288
# warn $re;
2289
# $re =~ s!^\(\?([a-z]*)\-[a-z]*:(.*)\)$!$2!
2290
# and $flags = $1;
2291
# warn qq{=> XPATH: "$re" , "$flags"};
2292
# ($re, $flags)
2293
#};
2294
2295
sub find_link_dom {
2296
0
0
1
my ($self,%opts) = @_;
2297
0
my %xpath_options;
2298
2299
0
for (qw(node document frames)) {
2300
# Copy over XPath options that were passed in
2301
0
0
if (exists $opts{ $_ }) {
2302
0
$xpath_options{ $_ } = delete $opts{ $_ };
2303
};
2304
};
2305
2306
0
my $single = delete $opts{ single };
2307
0
0
my $one = delete $opts{ one } || $single;
2308
0
0
0
if ($single and exists $opts{ n }) {
2309
0
croak "It doesn't make sense to use 'single' and 'n' option together"
2310
};
2311
0
0
my $n = (delete $opts{ n } || 1);
2312
0
0
$n--
2313
if ($n ne 'all'); # 1-based indexing
2314
0
my @spec;
2315
2316
# Decode text and text_contains into XPath
2317
0
for my $lvalue (qw( text id name class )) {
2318
0
my %lefthand = (
2319
text => 'text()',
2320
);
2321
0
my %match_op = (
2322
'' => q{%s="%s"},
2323
'contains' => q{contains(%s,"%s")},
2324
# Ideally we would also handle *_regex here, but Firefox XPath
2325
# does not support fn:matches() :-(
2326
#'regex' => q{matches(%s,"%s","%s")},
2327
);
2328
0
0
my $lhs = $lefthand{ $lvalue } || '@'.$lvalue;
2329
0
for my $op (keys %match_op) {
2330
0
my $v = $match_op{ $op };
2331
0
0
$op = '_'.$op if length($op);
2332
0
my $key = "${lvalue}$op";
2333
2334
0
0
if (exists $opts{ $key }) {
2335
0
my $p = delete $opts{ $key };
2336
0
push @spec, sprintf $v, $lhs, $p;
2337
};
2338
};
2339
};
2340
2341
0
0
if (my $p = delete $opts{ url }) {
2342
0
push @spec, sprintf '@href = "%s" or @src="%s"', quote_xpath $p, quote_xpath $p;
2343
}
2344
0
my @tags = (sort keys %link_spec);
2345
0
0
if (my $p = delete $opts{ tag }) {
2346
0
@tags = $p;
2347
};
2348
0
0
if (my $p = delete $opts{ tag_regex }) {
2349
0
@tags = grep /$p/, @tags;
2350
};
2351
2352
0
my $q = join '|',
2353
map {
2354
0
my @full = map {qq{($_)}} grep {defined} (@spec, $link_spec{$_}->{xpath});
0
0
2355
0
0
if (@full) {
2356
0
sprintf "//%s[%s]", $_, join " and ", @full;
2357
} else {
2358
0
sprintf "//%s", $_
2359
};
2360
} (@tags);
2361
#warn $q;
2362
2363
0
my @res = $self->xpath($q, %xpath_options );
2364
2365
0
0
if (keys %opts) {
2366
# post-filter the remaining links through WWW::Mechanize
2367
# for all the options we don't support with XPath
2368
2369
0
my $base = $self->base;
2370
0
require WWW::Mechanize;
2371
0
@res = grep {
2372
0
WWW::Mechanize::_match_any_link_parms($self->make_link($_,$base),\%opts)
2373
} @res;
2374
};
2375
2376
0
0
if ($one) {
2377
0
0
if (0 == @res) { $self->signal_condition( "No link found matching '$q'" )};
0
2378
0
0
if ($single) {
2379
0
0
if (1 < @res) {
2380
0
$self->highlight_node(@res);
2381
0
$self->signal_condition(
2382
sprintf "%d elements found found matching '%s'", scalar @res, $q
2383
);
2384
};
2385
};
2386
};
2387
2388
0
0
if ($n eq 'all') {
2389
return @res
2390
0
};
2391
0
$res[$n]
2392
}
2393
2394
=head2 C<< $mech->find_link( %options ) >>
2395
2396
print $_->text . "\n"
2397
for $mech->find_link( text_contains => 'CPAN' );
2398
2399
A method quite similar to L's method.
2400
The options are documented in C<< ->find_link_dom >>.
2401
2402
Returns a L object.
2403
2404
This defaults to not look through child frames.
2405
2406
=cut
2407
2408
sub find_link {
2409
0
0
1
my ($self,%opts) = @_;
2410
0
my $base = $self->base;
2411
0
0
0
croak "Option 'all' not available for ->find_link. Did you mean to call ->find_all_links()?"
2412
if 'all' eq ($opts{n} || '');
2413
0
0
if (my $link = $self->find_link_dom(frames => 0, %opts)) {
2414
0
return $self->make_link($link, $base)
2415
} else {
2416
return
2417
0
};
2418
};
2419
2420
=head2 C<< $mech->find_all_links( %options ) >>
2421
2422
print $_->text . "\n"
2423
for $mech->find_all_links( text_regex => qr/google/i );
2424
2425
Finds all links in the document.
2426
The options are documented in C<< ->find_link_dom >>.
2427
2428
Returns them as list or an array reference, depending
2429
on context.
2430
2431
This defaults to not look through child frames.
2432
2433
=cut
2434
2435
sub find_all_links {
2436
0
0
1
my ($self, %opts) = @_;
2437
0
$opts{ n } = 'all';
2438
0
my $base = $self->base;
2439
0
my @matches = map {
2440
0
$self->make_link($_, $base);
2441
} $self->find_all_links_dom( frames => 0, %opts );
2442
0
0
return @matches if wantarray;
2443
0
return \@matches;
2444
};
2445
2446
=head2 C<< $mech->find_all_links_dom %options >>
2447
2448
print $_->{innerHTML} . "\n"
2449
for $mech->find_all_links_dom( text_regex => qr/google/i );
2450
2451
Finds all matching linky DOM nodes in the document.
2452
The options are documented in C<< ->find_link_dom >>.
2453
2454
Returns them as list or an array reference, depending
2455
on context.
2456
2457
This defaults to not look through child frames.
2458
2459
=cut
2460
2461
sub find_all_links_dom {
2462
0
0
1
my ($self,%opts) = @_;
2463
0
$opts{ n } = 'all';
2464
0
my @matches = $self->find_link_dom( frames => 0, %opts );
2465
0
0
return @matches if wantarray;
2466
0
return \@matches;
2467
};
2468
2469
=head2 C<< $mech->follow_link( $link ) >>
2470
2471
=head2 C<< $mech->follow_link( %options ) >>
2472
2473
$mech->follow_link( xpath => '//a[text() = "Click here!"]' );
2474
2475
Follows the given link. Takes the same parameters that C
2476
uses. In addition, C can be passed to (not) force
2477
waiting for a new page to be loaded.
2478
2479
Note that C<< ->follow_link >> will only try to follow link-like
2480
things like C tags.
2481
2482
=cut
2483
2484
sub follow_link {
2485
0
0
1
my ($self,$link,%opts);
2486
0
0
if (@_ == 2) { # assume only a link parameter
2487
0
($self,$link) = @_;
2488
0
$self->click($link);
2489
} else {
2490
0
($self,%opts) = @_;
2491
0
_default_limiter( one => \%opts );
2492
0
$link = $self->find_link_dom(%opts);
2493
0
$self->click({ dom => $link, %opts });
2494
}
2495
}
2496
2497
=head2 C<< $mech->xpath( $query, %options ) >>
2498
2499
my $link = $mech->xpath('//a[id="clickme"]', one => 1);
2500
# croaks if there is no link or more than one link found
2501
2502
my @para = $mech->xpath('//p');
2503
# Collects all paragraphs
2504
2505
my @para_text = $mech->xpath('//p/text()', type => $mech->xpathResult('STRING_TYPE'));
2506
# Collects all paragraphs as text
2507
2508
Runs an XPath query in Firefox against the current document.
2509
2510
If you need more information about the returned results,
2511
use the C<< ->xpathEx() >> function.
2512
2513
The options allow the following keys:
2514
2515
=over 4
2516
2517
=item *
2518
2519
C<< document >> - document in which the query is to be executed. Use this to
2520
search a node within a specific subframe of C<< $mech->document >>.
2521
2522
=item *
2523
2524
C<< frames >> - if true, search all documents in all frames and iframes.
2525
This may or may not conflict with C. This will default to the
2526
C setting of the WWW::Mechanize::Firefox object.
2527
2528
=item *
2529
2530
C<< node >> - node relative to which the query is to be executed. Note
2531
that you will have to use a relative XPath expression as well. Use
2532
2533
.//foo
2534
2535
instead of
2536
2537
//foo
2538
2539
=item *
2540
2541
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2542
or carp, depending on the C parameter.
2543
2544
=item *
2545
2546
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2547
or carp, depending on the C parameter.
2548
2549
=item *
2550
2551
C<< maybe >> - If true, ensure that at most one element is found. Otherwise
2552
croak or carp, depending on the C parameter.
2553
2554
=item *
2555
2556
C<< all >> - If true, return all elements found. This is the default.
2557
You can use this option if you want to use C<< ->xpath >> in scalar context
2558
to count the number of matched elements, as it will otherwise emit a warning
2559
for each usage in scalar context without any of the above restricting options.
2560
2561
=item *
2562
2563
C<< any >> - no error is raised, no matter if an item is found or not.
2564
2565
=item *
2566
2567
C<< type >> - force the return type of the query.
2568
2569
type => $mech->xpathResult('ORDERED_NODE_SNAPSHOT_TYPE'),
2570
2571
WWW::Mechanize::Firefox tries a best effort in giving you the appropriate
2572
result of your query, be it a DOM node or a string or a number. In the case
2573
you need to restrict the return type, you can pass this in.
2574
2575
The allowed strings are documented in the MDN. Interesting types are
2576
2577
ANY_TYPE (default, uses whatever things the query returns)
2578
STRING_TYPE
2579
NUMBER_TYPE
2580
ORDERED_NODE_SNAPSHOT_TYPE
2581
2582
=back
2583
2584
Returns the matched results.
2585
2586
You can pass in a list of queries as an array reference for the first parameter.
2587
The result will then be the list of all elements matching any of the queries.
2588
2589
This is a method that is not implemented in WWW::Mechanize.
2590
2591
In the long run, this should go into a general plugin for
2592
L.
2593
2594
=cut
2595
2596
sub xpath {
2597
0
0
1
my ($self,$query,%options) = @_;
2598
2599
0
my $single = $options{ single };
2600
0
my $first = $options{ one };
2601
0
my $maybe = $options{ maybe };
2602
0
my $any = $options{ any };
2603
0
0
my $return_first_element = ($single or $first or $maybe or $any );
2604
2605
# Construct some helper variables
2606
0
0
my $zero_allowed = not ($single or $first);
2607
0
0
my $two_allowed = not( $single or $maybe);
2608
2609
# Sanity check for the common error of
2610
# my $item = $mech->xpath("//foo");
2611
0
0
0
if (! exists $options{ all } and not ($return_first_element)) {
2612
0
0
0
$self->signal_condition(join "\n",
2613
"You asked for many elements but seem to only want a single item.",
2614
"Did you forget to pass the 'single' option with a true value?",
2615
"Pass 'all => 1' to suppress this message and receive the count of items.",
2616
) if defined wantarray and !wantarray;
2617
};
2618
2619
# How can we return here a set of strings
2620
# if we don't return an array in .result?!
2621
my @res= map {
2622
0
!defined $_->{resultType}
2623
? ()
2624
: $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_SNAPSHOT_TYPE }
2625
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_SNAPSHOT_TYPE }
2626
|| $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_ITERATOR_TYPE }
2627
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_ITERATOR_TYPE }
2628
0
? @{ $_->{result} }
2629
: $_->{ result }
2630
0
0
0
} $self->xpathEx(
0
2631
$query,
2632
#type => $self->{XpathResultTypes}->{ORDERED_NODE_SNAPSHOT_TYPE},
2633
type => $self->{XpathResultTypes}->{ANY_TYPE},
2634
return_first => $return_first_element,
2635
%options
2636
);
2637
2638
0
0
0
if (! $zero_allowed and @res == 0) {
2639
0
0
$options{ user_info } ||= $query;
2640
0
$self->signal_condition( "No elements found for $options{ user_info }" );
2641
};
2642
2643
0
0
0
if (! $two_allowed and @res > 1) {
2644
0
0
$options{ user_info } ||= $query;
2645
0
$self->highlight_node(@res);
2646
0
$self->signal_condition( (scalar @res) . " elements found for $options{ user_info }" );
2647
};
2648
2649
0
0
$return_first_element ? $res[0] : @res
2650
};
2651
2652
sub _initXpathResultTypes {
2653
0
0
my( $self )= @_;
2654
0
0
$self->{XpathResultTypes} ||= {
2655
ANY_TYPE => $self->repl->constant('XPathResult.ANY_TYPE'),
2656
NUMBER_TYPE => $self->repl->constant('XPathResult.NUMBER_TYPE'),
2657
STRING_TYPE => $self->repl->constant('XPathResult.STRING_TYPE'),
2658
BOOLEAN_TYPE => $self->repl->constant('XPathResult.BOOLEAN_TYPE'),
2659
UNORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_ITERATOR_TYPE'),
2660
ORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_ITERATOR_TYPE'),
2661
UNORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE'),
2662
ORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_SNAPSHOT_TYPE'),
2663
ANY_UNORDERED_TYPE => $self->repl->constant('XPathResult.ANY_UNORDERED_NODE_TYPE'),
2664
FIRST_ORDERED_NODE_TYPE => $self->repl->constant('XPathResult.FIRST_ORDERED_NODE_TYPE'),
2665
};
2666
2667
0
$self->{XpathResultTypenames} = +{ reverse %{ $self->{XpathResultTypes} } };
0
2668
};
2669
2670
0
0
0
sub xpathResultType { $_[0]->{ XpathResultTypenames }->{ $_[1] } };
2671
0
0
0
sub xpathResult { $_[0]->{XpathResultTypes}{$_[1]}; }
2672
2673
=head2 C<< $mech->xpathEx( $query, %options ) >>
2674
2675
my @links = $mech->xpathEx('//a[id="clickme"]');
2676
2677
Runs an XPath query in Firefox against a document. Returns a list
2678
of found elements. Each element in the result has the following properties:
2679
2680
=over 4
2681
2682
=item *
2683
2684
C<< resultType >> - the type of the result. The numerical value of C<< $mech->xpathResult() >>.
2685
2686
=item *
2687
2688
C<< resultSize >> - the number of elements in this result. This is 1 for atomic results like
2689
strings or numbers, and the number of elements for nodesets.
2690
2691
=item *
2692
2693
C<< result >> - the best result available. This is the nodeset
2694
or the text or number, depending on the query.
2695
2696
=back
2697
2698
=cut
2699
2700
sub xpathEx {
2701
# Returns verbose information about how things matched
2702
0
0
1
my ($self, $query, %options) = @_;
2703
2704
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2705
0
$query = [$query];
2706
};
2707
2708
0
0
if ($options{ node }) {
2709
0
0
$options{ document } ||= $options{ node }->{ownerDocument};
2710
#warn "Have node, searching below node";
2711
} else {
2712
0
0
$options{ document } ||= $self->document;
2713
#warn "Searching below given document";
2714
#$options{node} = $options{document};
2715
};
2716
2717
0
0
$options{type} ||= $self->{XpathResult}->{ANY_TYPE};
2718
2719
0
0
$options{ user_info } ||= join " or ", map {qq{'$_'}} @$query;
0
2720
2721
# Sanity check for the common error of
2722
# my $item = $mech->xpathEx("//foo");
2723
0
0
if (! wantarray) {
2724
0
$self->signal_condition(join "\n",
2725
"->xpathEx needs to be called in list context.",
2726
);
2727
};
2728
2729
0
0
if (not exists $options{ frames }) {
2730
0
$options{frames} = $self->{frames};
2731
};
2732
2733
0
my $query_xpath = $self->repl->declare(<<'JS');
2734
function(doc, q, ref, type) {
2735
var xpr = doc.evaluate(q, ref, null, type, null);
2736
var r = { resultType: xpr.resultType, resultSize: 0, result: null };
2737
switch(xpr.resultType) {
2738
case XPathResult.NUMBER_TYPE:
2739
r.result= r.numberValue = xpr.numberValue;
2740
r.resultSize= 1;
2741
break;
2742
case XPathResult.STRING_TYPE:
2743
r.result= r.stringValue = xpr.stringValue;
2744
r.resultSize= 1;
2745
break;
2746
case XPathResult.BOOLEAN_TYPE:
2747
r.result= r.booleanValue = xpr.booleanValue;
2748
r.resultSize= 1;
2749
break;
2750
case XPathResult.UNORDERED_NODE_ITERATOR_TYPE:
2751
case XPathResult.ORDERED_NODE_ITERATOR_TYPE:
2752
r.result= r.nodeSet = [];
2753
var n;
2754
while (n = xpr.iterateNext()) {
2755
r.nodeSet.push(n);
2756
r.resultSize++;
2757
}
2758
break;
2759
case XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE:
2760
case XPathResult.ORDERED_NODE_SNAPSHOT_TYPE:
2761
r.result= r.nodeSet = [];
2762
r.resultSize= xpr.snapshotLength;
2763
for (var i = 0 ; i < xpr.snapshotLength; i++ ) {
2764
r.nodeSet[i] = xpr.snapshotItem(i);
2765
}
2766
break;
2767
case XPathResult.ANY_UNORDERED_NODE_TYPE:
2768
case XPathResult.FIRST_ORDERED_NODE_TYPE:
2769
r.result= r.singleNodeValue = xpr.singleNodeValue;
2770
r.resultSize= 1;
2771
break;
2772
default:
2773
break;
2774
}
2775
return r;
2776
}
2777
JS
2778
2779
0
my @res;
2780
2781
DOCUMENTS: {
2782
0
my @documents = $options{ document };
0
2783
#warn "Invalid root document" unless $options{ document };
2784
2785
# recursively join the results of sub(i)frames if wanted
2786
# This should maybe go into the loop to expand every frame as we descend
2787
# into the available subframes
2788
2789
0
while (@documents) {
2790
0
my $doc = shift @documents;
2791
#warn "Invalid document" unless $doc;
2792
2793
0
0
my $n = $options{ node } || $doc;
2794
#warn ">Searching @$query in $doc->{title}";
2795
# Munge the multiple @$queries into one:
2796
0
my $q = join "|", @$query;
2797
#warn $q;
2798
0
my @found = $query_xpath->($doc, $q, $n, $options{type});
2799
0
push @res, @found;
2800
2801
# A small optimization to return if we already have enough elements
2802
# We can't do this on $return_first as there might be more elements
2803
0
0
0
if( @res and $options{ return_first } and grep { $_->{resultSize} } @res ) {
0
0
2804
0
@res= grep { $_->{resultSize} } @res;
0
2805
0
last DOCUMENTS;
2806
};
2807
2808
0
0
0
if ($options{ frames } and not $options{ node }) {
2809
#warn ">Expanding below " . $doc->{title};
2810
#local $nesting .= "--";
2811
0
my @d = $self->expand_frames( $options{ frames }, $doc );
2812
#warn "Found $_->{title}" for @d;
2813
0
push @documents, @d;
2814
};
2815
};
2816
};
2817
2818
@res
2819
0
}
2820
2821
=head2 C<< $mech->selector( $css_selector, %options ) >>
2822
2823
my @text = $mech->selector('p.content');
2824
2825
Returns all nodes matching the given CSS selector. If
2826
C<$css_selector> is an array reference, it returns
2827
all nodes matched by any of the CSS selectors in the array.
2828
2829
This takes the same options that C<< ->xpath >> does.
2830
2831
In the long run, this should go into a general plugin for
2832
L.
2833
2834
=cut
2835
2836
sub selector {
2837
0
0
1
my ($self,$query,%options) = @_;
2838
0
0
$options{ user_info } ||= "CSS selector '$query'";
2839
0
0
0
if ('ARRAY' ne (ref $query || '')) {
2840
0
$query = [$query];
2841
};
2842
0
0
my $root = $options{ node } ? './' : '';
2843
0
my @q = map { selector_to_xpath($_, root => $root) } @$query;
0
2844
0
$self->xpath(\@q, %options);
2845
};
2846
2847
=head2 C<< $mech->by_id( $id, %options ) >>
2848
2849
my @text = $mech->by_id('_foo:bar');
2850
2851
Returns all nodes matching the given ids. If
2852
C<$id> is an array reference, it returns
2853
all nodes matched by any of the ids in the array.
2854
2855
This method is equivalent to calling C<< ->xpath >> :
2856
2857
$self->xpath(qq{//*[\@id="$_"], %options)
2858
2859
It is convenient when your element ids get mistaken for
2860
CSS selectors.
2861
2862
=cut
2863
2864
sub by_id {
2865
0
0
1
my ($self,$query,%options) = @_;
2866
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2867
0
$query = [$query];
2868
};
2869
0
$options{ user_info } ||= "id "
2870
0
0
. join(" or ", map {qq{'$_'}} @$query)
2871
. " found";
2872
0
$query = [map { qq{.//*[\@id="$_"]} } @$query];
0
2873
0
$self->xpath($query, %options)
2874
}
2875
2876
=head2 C<< $mech->click( $name [,$x ,$y] ) >>
2877
2878
$mech->click( 'go' );
2879
$mech->click({ xpath => '//button[@name="go"]' });
2880
2881
Has the effect of clicking a button (or other element) on the current form. The
2882
first argument is the C of the button to be clicked. The second and third
2883
arguments (optional) allow you to specify the (x,y) coordinates of the click.
2884
2885
If there is only one button on the form, C<< $mech->click() >> with
2886
no arguments simply clicks that one button.
2887
2888
If you pass in a hash reference instead of a name,
2889
the following keys are recognized:
2890
2891
=over 4
2892
2893
=item *
2894
2895
C - Find the element to click by the CSS selector
2896
2897
=item *
2898
2899
C - Find the element to click by the XPath query
2900
2901
=item *
2902
2903
C - Click on the passed DOM element
2904
2905
You can use this to click on arbitrary page elements. There is no convenient
2906
way to pass x/y co-ordinates with this method.
2907
2908
=item *
2909
2910
C - Click on the element with the given id
2911
2912
This is useful if your document ids contain characters that
2913
do look like CSS selectors. It is equivalent to
2914
2915
xpath => qq{//*[\@id="$id"]}
2916
2917
=item *
2918
2919
C - Synchronize the click (default is 1)
2920
2921
Synchronizing means that WWW::Mechanize::Firefox will wait until
2922
one of the events listed in C is fired. You want to switch
2923
it off when there will be no HTTP response or DOM event fired, for
2924
example for clicks that only modify the DOM.
2925
2926
You can pass in a scalar that is a false value to not wait for
2927
any kind of event.
2928
2929
Passing in an array reference will use the array elements as
2930
Javascript events to wait for.
2931
2932
Passing in any other true value will use the value of C<< ->events >>
2933
as the list of events to wait for.
2934
2935
=back
2936
2937
Returns a L object.
2938
2939
As a deviation from the WWW::Mechanize API, you can also pass a
2940
hash reference as the first parameter. In it, you can specify
2941
the parameters to search much like for the C calls.
2942
2943
Note: Currently, clicking on images with the C attribute
2944
does not trigger the move to the new URL. A workaround is to program
2945
the new URL into your script.
2946
2947
=cut
2948
2949
sub click {
2950
0
0
1
my ($self,$name,$x,$y) = @_;
2951
0
my %options;
2952
my @buttons;
2953
2954
0
0
0
if (! defined $name) {
0
0
0
2955
0
croak("->click called with undef link");
2956
} elsif (ref $name and blessed($name) and $name->can('__click')) {
2957
0
$options{ dom } = $name;
2958
} elsif (ref $name eq 'HASH') { # options
2959
0
%options = %$name;
2960
} else {
2961
0
$options{ name } = $name;
2962
};
2963
2964
0
0
if (exists $options{ name }) {
2965
0
0
$name = quotemeta($options{ name }|| '');
2966
$options{ xpath } = [
2967
0
sprintf( q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit" or @type="image") and @name="%s")]}, $name, $name),
2968
];
2969
0
0
if ($options{ name } eq '') {
2970
0
push @{ $options{ xpath }},
0
2971
q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input") and @type="button" or @type="submit" or @type="image"]},
2972
;
2973
};
2974
0
$options{ user_info } = "Button with name '$name'";
2975
};
2976
2977
0
0
if (! exists $options{ synchronize }) {
0
2978
0
$options{ synchronize } = $self->events;
2979
} elsif( ! ref $options{ synchronize }) {
2980
$options{ synchronize } = $options{ synchronize }
2981
0
0
? $self->events
2982
: [],
2983
};
2984
2985
0
0
if ($options{ dom }) {
2986
0
@buttons = $options{ dom };
2987
} else {
2988
0
@buttons = $self->_option_query(%options);
2989
};
2990
2991
$self->_sync_call(
2992
$options{ synchronize }, sub { # ,'abort'
2993
0
0
$buttons[0]->__click($x,$y);
2994
}
2995
0
);
2996
2997
0
0
if (defined wantarray) {
2998
0
return $self->response
2999
};
3000
}
3001
3002
=head2 C<< $mech->click_button( ... ) >>
3003
3004
$mech->click_button( name => 'go' );
3005
$mech->click_button( input => $mybutton );
3006
3007
Has the effect of clicking a button on the current form by specifying its
3008
name, value, or index. Its arguments are a list of key/value pairs. Only
3009
one of name, number, input or value must be specified in the keys.
3010
3011
=over 4
3012
3013
=item *
3014
3015
C - name of the button
3016
3017
=item *
3018
3019
C - value of the button
3020
3021
=item *
3022
3023
C - DOM node
3024
3025
=item *
3026
3027
C - id of the button
3028
3029
=item *
3030
3031
C - number of the button
3032
3033
=back
3034
3035
If you find yourself wanting to specify a button through its
3036
C or C, consider using C<< ->click >> instead.
3037
3038
=cut
3039
3040
sub click_button {
3041
0
0
1
my ($self,%options) = @_;
3042
0
my $node;
3043
my $xpath;
3044
0
my $user_message;
3045
0
0
if (exists $options{ input }) {
0
0
0
0
3046
0
$node = delete $options{ input };
3047
} elsif (exists $options{ name }) {
3048
0
my $v = delete $options{ name };
3049
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and @type="button" or @type="submit" and @name="%s")]', $v, $v);
3050
0
$user_message = "Button name '$v' unknown";
3051
} elsif (exists $options{ value }) {
3052
0
my $v = delete $options{ value };
3053
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" and @value="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit") and @value="%s")]', $v, $v);
3054
0
$user_message = "Button value '$v' unknown";
3055
} elsif (exists $options{ id }) {
3056
0
my $v = delete $options{ id };
3057
0
$xpath = sprintf '//*[@id="%s"]', $v;
3058
0
$user_message = "Button name '$v' unknown";
3059
} elsif (exists $options{ number }) {
3060
0
my $v = delete $options{ number };
3061
0
$xpath = sprintf '//*[translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "input" and @type="submit")][%s]', $v;
3062
0
$user_message = "Button number '$v' out of range";
3063
};
3064
#warn $xpath;
3065
0
0
$node ||= $self->xpath( $xpath,
3066
node => $self->current_form,
3067
single => 1,
3068
user_message => $user_message,
3069
);
3070
0
0
if ($node) {
3071
0
$self->click({ dom => $node, %options });
3072
} else {
3073
3074
0
$self->signal_condition($user_message);
3075
};
3076
3077
}
3078
3079
=head1 FORM METHODS
3080
3081
=head2 C<< $mech->current_form() >>
3082
3083
print $mech->current_form->{name};
3084
3085
Returns the current form.
3086
3087
This method is incompatible with L.
3088
It returns the DOM C<<
3089
a L instance.
3090
3091
Note that WWW::Mechanize::Firefox has little way to know
3092
that the current form is not displayed in the browser
3093
anymore, so it often holds on to the last value. If
3094
you want to make sure that a fresh or no form is used,
3095
remove it:
3096
3097
$mech->clear_current_form;
3098
3099
The current form will be reset by WWW::Mechanize::Firefox
3100
on calls to C<< ->get() >> and C<< ->get_local() >>,
3101
and on calls to C<< ->submit() >> and C<< ->submit_with_fields >>.
3102
3103
=cut
3104
3105
sub current_form {
3106
0
0
1
$_[0]->{current_form}
3107
};
3108
sub clear_current_form {
3109
0
0
0
undef $_[0]->{current_form};
3110
};
3111
3112
=head2 C<< $mech->form_name( $name [, %options] ) >>
3113
3114
$mech->form_name( 'search' );
3115
3116
Selects the current form by its name. The options
3117
are identical to those accepted by the L<< /$mech->xpath >> method.
3118
3119
=cut
3120
3121
sub form_name {
3122
0
0
1
my ($self,$name,%options) = @_;
3123
0
$name = quote_xpath $name;
3124
0
_default_limiter( single => \%options );
3125
0
$self->{current_form} = $self->selector("form[name='$name']",
3126
user_info => "form name '$name'",
3127
%options
3128
);
3129
};
3130
3131
=head2 C<< $mech->form_id( $id [, %options] ) >>
3132
3133
$mech->form_id( 'login' );
3134
3135
Selects the current form by its C attribute.
3136
The options
3137
are identical to those accepted by the L<< /$mech->xpath >> method.
3138
3139
This is equivalent to calling
3140
3141
$mech->by_id($id,single => 1,%options)
3142
3143
=cut
3144
3145
sub form_id {
3146
0
0
1
my ($self,$name,%options) = @_;
3147
3148
0
_default_limiter( single => \%options );
3149
0
$self->{current_form} = $self->by_id($name,
3150
user_info => "form with id '$name'",
3151
%options
3152
);
3153
};
3154
3155
=head2 C<< $mech->form_number( $number [, %options] ) >>
3156
3157
$mech->form_number( 2 );
3158
3159
Selects the Ith form.
3160
The options
3161
are identical to those accepted by the L<< /$mech->xpath >> method.
3162
3163
=cut
3164
3165
sub form_number {
3166
0
0
1
my ($self,$number,%options) = @_;
3167
3168
0
_default_limiter( single => \%options );
3169
0
$self->{current_form} = $self->xpath("(//form)[$number]",
3170
user_info => "form number $number",
3171
%options
3172
);
3173
};
3174
3175
=head2 C<< $mech->form_with_fields( [$options], @fields ) >>
3176
3177
$mech->form_with_fields(
3178
'user', 'password'
3179
);
3180
3181
Find the form which has the listed fields.
3182
3183
If the first argument is a hash reference, it's taken
3184
as options to C<< ->xpath >>.
3185
3186
See also L<< /$mech->submit_form >>.
3187
3188
=cut
3189
3190
sub form_with_fields {
3191
0
0
1
my ($self,@fields) = @_;
3192
0
my $options = {};
3193
0
0
if (ref $fields[0] eq 'HASH') {
3194
0
$options = shift @fields;
3195
};
3196
0
my @clauses = map { $self->application->element_query([qw[input select textarea]], { 'name' => $_ })} @fields;
0
3197
3198
3199
0
my $q = "//form[" . join( " and ", @clauses)."]";
3200
#warn $q;
3201
0
_default_limiter( single => $options );
3202
0
$self->{current_form} = $self->xpath($q,
3203
user_info => "form with fields [@fields]",
3204
%$options
3205
);
3206
};
3207
3208
=head2 C<< $mech->forms( %options ) >>
3209
3210
my @forms = $mech->forms();
3211
3212
When called in a list context, returns a list
3213
of the forms found in the last fetched page.
3214
In a scalar context, returns a reference to
3215
an array with those forms.
3216
3217
The options
3218
are identical to those accepted by the L<< /$mech->selector >> method.
3219
3220
The returned elements are the DOM C<<
3221
3222
=cut
3223
3224
sub forms {
3225
0
0
1
my ($self, %options) = @_;
3226
0
my @res = $self->selector('form', %options);
3227
return wantarray ? @res
3228
0
0
: \@res
3229
};
3230
3231
=head2 C<< $mech->field( $selector, $value, [,\@pre_events [,\@post_events]] ) >>
3232
3233
$mech->field( user => 'joe' );
3234
$mech->field( not_empty => '', [], [] ); # bypass JS validation
3235
3236
Sets the field with the name given in C<$selector> to the given value.
3237
Returns the value.
3238
3239
The method understands very basic CSS selectors in the value for C<$selector>,
3240
like the L find_input() method.
3241
3242
A selector prefixed with '#' must match the id attribute of the input.
3243
A selector prefixed with '.' matches the class attribute. A selector
3244
prefixed with '^' or with no prefix matches the name attribute.
3245
3246
By passing the array reference C<@pre_events>, you can indicate which
3247
Javascript events you want to be triggered before setting the value.
3248
C<@post_events> contains the events you want to be triggered
3249
after setting the value.
3250
3251
By default, the events set in the
3252
constructor for C and C
3253
are triggered.
3254
3255
=cut
3256
3257
sub field {
3258
0
0
1
my ($self,$name,$value,$pre,$post) = @_;
3259
0
$self->get_set_value(
3260
name => $name,
3261
value => $value,
3262
pre => $pre,
3263
post => $post,
3264
node => $self->current_form,
3265
);
3266
}
3267
3268
=head2 C<< $mech->value( $selector_or_element, [%options] ) >>
3269
3270
print $mech->value( 'user' );
3271
3272
Returns the value of the field given by C<$selector_or_name> or of the
3273
DOM element passed in.
3274
3275
The legacy form of
3276
3277
$mech->value( name => value );
3278
3279
is also still supported but will likely be deprecated
3280
in favour of the C<< ->field >> method.
3281
3282
For fields that can have multiple values, like a C field,
3283
the method is context sensitive and returns the first selected
3284
value in scalar context and all values in list context.
3285
3286
=cut
3287
3288
sub value {
3289
0
0
0
1
if (@_ == 3) {
3290
0
my ($self,$name,$value) = @_;
3291
0
return $self->field($name => $value);
3292
} else {
3293
0
my ($self,$name,%options) = @_;
3294
0
return $self->get_set_value(
3295
node => $self->current_form,
3296
%options,
3297
name => $name,
3298
);
3299
};
3300
};
3301
3302
=head2 C<< $mech->get_set_value( %options ) >>
3303
3304
Allows fine-grained access to getting/setting a value
3305
with a different API. Supported keys are:
3306
3307
pre
3308
post
3309
name
3310
node
3311
value
3312
3313
in addition to all keys that C<< $mech->xpath >> supports.
3314
3315
=cut
3316
3317
sub _field_by_name {
3318
0
0
my ($self,%options) = @_;
3319
0
my @fields;
3320
0
my $name = delete $options{ name };
3321
0
my $attr = 'name';
3322
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3323
0
$attr = 'name'
3324
} elsif ($name =~ s/^#//) {
3325
0
$attr = 'id'
3326
} elsif ($name =~ s/^\.//) {
3327
0
$attr = 'class'
3328
};
3329
0
0
if (blessed $name) {
3330
0
@fields = $name;
3331
} else {
3332
0
_default_limiter( single => \%options );
3333
0
my $query = $self->application->element_query([qw[input select textarea]], { $attr => $name });
3334
#warn $query;
3335
0
@fields = $self->xpath($query,%options);
3336
};
3337
@fields
3338
0
}
3339
3340
sub get_set_value {
3341
0
0
1
my ($self,%options) = @_;
3342
0
my $set_value = exists $options{ value };
3343
0
my $value = delete $options{ value };
3344
0
0
my $pre = delete $options{pre} || $self->{pre_value};
3345
0
0
my $post = delete $options{post} || $self->{post_value};
3346
0
my $name = delete $options{ name };
3347
0
my @fields = $self->_field_by_name(
3348
name => $name,
3349
user_info => "input with name '$name'",
3350
%options );
3351
0
0
$pre = [$pre]
3352
if (! ref $pre);
3353
0
0
$post = [$post]
3354
if (! ref $post);
3355
3356
0
0
if ($fields[0]) {
3357
0
my $tag = $fields[0]->{tagName};
3358
0
0
if ($set_value) {
3359
0
for my $ev (@$pre) {
3360
0
$fields[0]->__event($ev);
3361
};
3362
3363
0
0
if ('select' eq $tag) {
3364
0
$self->select($fields[0], $value);
3365
} else {
3366
0
$fields[0]->{value} = $value;
3367
};
3368
3369
0
for my $ev (@$post) {
3370
0
$fields[0]->__event($ev);
3371
};
3372
};
3373
# What about 'checkbox'es/radioboxes?
3374
3375
# Don't bother to fetch the field's value if it's not wanted
3376
0
0
return unless defined wantarray;
3377
3378
# We could save some work here for the simple case of single-select
3379
# dropdowns by not enumerating all options
3380
0
0
if ('SELECT' eq uc $tag) {
3381
0
my @options = $self->xpath('.//option', node => $fields[0] );
3382
0
my @values = map { $_->{value} } grep { $_->{selected} } @options;
0
0
3383
0
0
if (wantarray) {
3384
return @values
3385
0
} else {
3386
0
return $values[0];
3387
}
3388
} else {
3389
0
return $fields[0]->{value}
3390
};
3391
} else {
3392
return
3393
0
}
3394
}
3395
3396
=head2 C<< $mech->select( $name, $value ) >>
3397
3398
=head2 C<< $mech->select( $name, \@values ) >>
3399
3400
Given the name of a C field, set its value to the value
3401
specified. If the field is not C<< >> and the
3402
C<$value> is an array, only the B value will be set.
3403
Passing C<$value> as a hash with
3404
an C key selects an item by number (e.g.
3405
C<< {n => 3} >> or C<< {n => [2,4]} >>).
3406
The numbering starts at 1. This applies to the current form.
3407
3408
If you have a field with C<< >> and you pass a single
3409
C<$value>, then C<$value> will be added to the list of fields selected,
3410
without clearing the others. However, if you pass an array reference,
3411
then all previously selected values will be cleared.
3412
3413
Returns true on successfully setting the value. On failure, returns
3414
false and calls C<< $self>warn() >> with an error message.
3415
3416
=cut
3417
3418
sub select {
3419
0
0
1
my ($self, $name, $value) = @_;
3420
0
my ($field) = $self->_field_by_name(
3421
node => $self->current_form,
3422
name => $name,
3423
#%options,
3424
);
3425
3426
0
0
if (! $field) {
3427
return
3428
0
};
3429
3430
0
my @options = $self->xpath( './/option', node => $field);
3431
0
my @by_index;
3432
my @by_value;
3433
0
my $single = $field->{type} eq "select-one";
3434
0
my $deselect;
3435
3436
0
0
0
if ('HASH' eq ref $value||'') {
0
0
3437
0
for (keys %$value) {
3438
0
0
$self->warn(qq{Unknown select value parameter "$_"})
3439
unless $_ eq 'n';
3440
}
3441
3442
0
$deselect = ref $value->{n};
3443
0
0
@by_index = ref $value->{n} ? @{ $value->{n} } : $value->{n};
0
3444
} elsif ('ARRAY' eq ref $value||'') {
3445
# clear all preselected values
3446
0
$deselect = 1;
3447
0
@by_value = @{ $value };
0
3448
} else {
3449
0
@by_value = $value;
3450
};
3451
3452
0
0
if ($deselect) {
3453
0
for my $o (@options) {
3454
0
$o->{selected} = 0;
3455
}
3456
};
3457
3458
0
0
if ($single) {
3459
# Only use the first element for single-element boxes
3460
0
0
$#by_index = 0+@by_index ? 0 : -1;
3461
0
0
$#by_value = 0+@by_value ? 0 : -1;
3462
};
3463
3464
# Select the items, either by index or by value
3465
0
for my $idx (@by_index) {
3466
0
$options[$idx-1]->{selected} = 1;
3467
};
3468
3469
0
for my $v (@by_value) {
3470
0
my $option = $self->xpath( sprintf( './/option[@value="%s"]', quote_xpath $v) , node => $field, single => 1 );
3471
0
$option->{selected} = 1;
3472
};
3473
3474
0
return @by_index + @by_value > 0;
3475
}
3476
3477
=head2 C<< $mech->tick( $name, $value [, $set ] ) >>
3478
3479
$mech->tick("confirmation_box", 'yes');
3480
3481
"Ticks" the first checkbox that has both the name and value associated with it
3482
on the current form. Dies if there is no named check box for that value.
3483
Passing in a false value as the third optional argument will cause the
3484
checkbox to be unticked.
3485
3486
(Un)ticking the checkbox is done by sending a click event to it if needed.
3487
If C<$value> is C, the first checkbox matching C<$name> will
3488
be (un)ticked.
3489
3490
If C<$name> is a reference to a hash, that hash will be used
3491
as the options to C<< ->find_link_dom >> to find the element.
3492
3493
=cut
3494
3495
sub tick {
3496
0
0
1
my ($self, $name, $value, $set) = @_;
3497
0
0
$set = 1
3498
if (@_ < 4);
3499
0
my %options;
3500
my @boxes;
3501
3502
0
0
0
if (! defined $name) {
0
0
0
3503
0
croak("->tick called with undef name");
3504
} elsif (ref $name and blessed($name) and $name->can('__click')) {
3505
0
$options{ dom } = $name;
3506
} elsif (ref $name eq 'HASH') { # options
3507
0
%options = %$name;
3508
} else {
3509
0
$options{ name } = $name;
3510
};
3511
3512
0
0
if (exists $options{ name }) {
3513
0
my $attr = 'name';
3514
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3515
0
$attr = 'name'
3516
} elsif ($name =~ s/^#//) {
3517
0
$attr = 'id'
3518
} elsif ($name =~ s/^\.//) {
3519
0
$attr = 'class'
3520
};
3521
0
$name = quotemeta($name);
3522
0
0
$value = quotemeta($value) if $value;
3523
3524
0
_default_limiter( one => \%options );
3525
$options{ xpath } = [
3526
0
0
defined $value
3527
? sprintf( q{//input[@type="checkbox" and @%s="%s" and @value="%s"]}, $attr, $name, $value)
3528
: sprintf( q{//input[@type="checkbox" and @%s="%s"]}, $attr, $name)
3529
];
3530
0
0
$options{ user_info } = defined $value
3531
? "Checkbox with name '$name' and value '$value'"
3532
: "Checkbox with name '$name'";
3533
};
3534
3535
0
0
if ($options{ dom }) {
3536
0
@boxes = $options{ dom };
3537
} else {
3538
0
@boxes = $self->_option_query(%options);
3539
};
3540
3541
0
my $target = $boxes[0];
3542
0
my $is_set = $self->application->bool_ff_to_perl( $target->{checked} );
3543
0
0
0
if ($set xor $is_set) {
3544
0
0
if ($set) {
3545
0
$target->{checked}= 'checked';
3546
} else {
3547
0
$target->{checked} = 0;
3548
};
3549
};
3550
};
3551
3552
=head2 C<< $mech->untick( $name, $value ) >>
3553
3554
$mech->untick('spam_confirm','yes',undef)
3555
3556
Causes the checkbox to be unticked. Shorthand for
3557
3558
$mech->tick($name,$value,undef)
3559
3560
=cut
3561
3562
sub untick {
3563
0
0
1
my ($self, $name, $value) = @_;
3564
0
$self->tick( $name, $value, undef );
3565
};
3566
3567
=head2 C<< $mech->submit( $form ) >>
3568
3569
$mech->submit;
3570
3571
Submits the form. Note that this does B fire the C
3572
event and thus also does not fire eventual Javascript handlers.
3573
Maybe you want to use C<< $mech->click >> instead.
3574
3575
The default is to submit the current form as returned
3576
by C<< $mech->current_form >>.
3577
3578
=cut
3579
3580
sub submit {
3581
0
0
1
my ($self,$dom_form) = @_;
3582
0
0
$dom_form ||= $self->current_form;
3583
0
0
if ($dom_form) {
3584
0
$dom_form->submit(); # why don't we ->synchronize here??
3585
0
$self->signal_http_status;
3586
3587
0
$self->clear_current_form;
3588
0
1;
3589
} else {
3590
0
croak "I don't know which form to submit, sorry.";
3591
}
3592
};
3593
3594
=head2 C<< $mech->submit_form( %options ) >>
3595
3596
$mech->submit_form(
3597
with_fields => {
3598
user => 'me',
3599
pass => 'secret',
3600
}
3601
);
3602
3603
This method lets you select a form from the previously fetched page,
3604
fill in its fields, and submit it. It combines the form_number/form_name,
3605
set_fields and click methods into one higher level call. Its arguments are
3606
a list of key/value pairs, all of which are optional.
3607
3608
=over 4
3609
3610
=item *
3611
3612
C<< form => $mech->current_form() >>
3613
3614
Specifies the form to be filled and submitted. Defaults to the current form.
3615
3616
=item *
3617
3618
C<< fields => \%fields >>
3619
3620
Specifies the fields to be filled in the current form
3621
3622
=item *
3623
3624
C<< with_fields => \%fields >>
3625
3626
Probably all you need for the common case. It combines a smart form selector
3627
and data setting in one operation. It selects the first form that contains
3628
all fields mentioned in \%fields. This is nice because you don't need to
3629
know the name or number of the form to do this.
3630
3631
(calls L<< /$mech->form_with_fields() >> and L<< /$mech->set_fields() >>).
3632
3633
If you choose this, the form_number, form_name, form_id and fields options
3634
will be ignored.
3635
3636
=back
3637
3638
=cut
3639
3640
sub submit_form {
3641
0
0
1
my ($self,%options) = @_;
3642
3643
0
my $form = delete $options{ form };
3644
0
my $fields;
3645
0
0
if (! $form) {
3646
0
0
if ($fields = delete $options{ with_fields }) {
3647
0
my @names = keys %$fields;
3648
0
$form = $self->form_with_fields( \%options, @names );
3649
0
0
if (! $form) {
3650
0
$self->signal_condition("Couldn't find a matching form for @names.");
3651
return
3652
0
};
3653
} else {
3654
0
0
$fields = delete $options{ fields } || {};
3655
0
$form = $self->current_form;
3656
};
3657
};
3658
3659
0
0
if (! $form) {
3660
0
$self->signal_condition("No form found to submit.");
3661
return
3662
0
};
3663
0
$self->do_set_fields( form => $form, fields => $fields );
3664
0
$self->submit($form);
3665
}
3666
3667
=head2 C<< $mech->set_fields( $name => $value, ... ) >>
3668
3669
$mech->set_fields(
3670
user => 'me',
3671
pass => 'secret',
3672
);
3673
3674
This method sets multiple fields of the current form. It takes a list of
3675
field name and value pairs. If there is more than one field with the same
3676
name, the first one found is set. If you want to select which of the
3677
duplicate field to set, use a value which is an anonymous array which
3678
has the field value and its number as the 2 elements.
3679
3680
=cut
3681
3682
sub set_fields {
3683
0
0
1
my ($self, %fields) = @_;
3684
0
my $f = $self->current_form;
3685
0
0
if (! $f) {
3686
0
croak "Can't set fields: No current form set.";
3687
};
3688
0
$self->do_set_fields(form => $f, fields => \%fields);
3689
};
3690
3691
sub do_set_fields {
3692
0
0
0
my ($self, %options) = @_;
3693
0
my $form = delete $options{ form };
3694
0
my $fields = delete $options{ fields };
3695
3696
0
while (my($n,$v) = each %$fields) {
3697
0
0
if (ref $v) {
3698
0
($v,my $num) = @$v;
3699
0
0
warn "Index larger than 1 not supported, ignoring"
3700
unless $num == 1;
3701
};
3702
3703
0
$self->get_set_value( node => $form, name => $n, value => $v, %options );
3704
}
3705
};
3706
3707
=head2 C<< $mech->set_visible( @values ) >>
3708
3709
$mech->set_visible( $username, $password );
3710
3711
This method sets fields of the current form without having to know their
3712
names. So if you have a login screen that wants a username and password,
3713
you do not have to fetch the form and inspect the source (or use the
3714
C utility, installed with L) to see what
3715
the field names are; you can just say
3716
3717
$mech->set_visible( $username, $password );
3718
3719
and the first and second fields will be set accordingly. The method is
3720
called set_visible because it acts only on visible fields;
3721
hidden form inputs are not considered. It also respects
3722
the respective return value of C<< ->is_visible() >> for each
3723
field, so hiding of fields through CSS affects this too.
3724
3725
The specifiers that are possible in L are not yet supported.
3726
3727
=cut
3728
3729
sub set_visible {
3730
0
0
1
my ($self,@values) = @_;
3731
0
my $form = $self->current_form;
3732
0
my @form;
3733
0
0
if ($form) { @form = (node => $form) };
0
3734
0
my @visible_fields = $self->xpath( q{//input[not(@type) or }
3735
. q{(@type!= "hidden" and }
3736
. q{ @type!= "button" and }
3737
. q{ @type!= "submit" and }
3738
. q{ @type!= "image")]},
3739
@form
3740
);
3741
3742
0
@visible_fields = grep { $self->is_visible( $_ ) } @visible_fields;
0
3743
3744
0
0
if (@values > @visible_fields) {
3745
0
$self->signal_condition( "Not enough fields on page" );
3746
} else {
3747
0
for my $idx (0..$#values) {
3748
0
$self->field( $visible_fields[ $idx ] => $values[ $idx ]);
3749
};
3750
}
3751
}
3752
3753
=head2 C<< $mech->is_visible( $element ) >>
3754
3755
=head2 C<< $mech->is_visible( %options ) >>
3756
3757
if ($mech->is_visible( selector => '#login' )) {
3758
print "You can log in now.";
3759
};
3760
3761
Returns true if the element is visible, that is, it is
3762
a member of the DOM and neither it nor its ancestors have
3763
a CSS C attribute of C or
3764
a C attribute of C.
3765
3766
You can either pass in a DOM element or a set of key/value
3767
pairs to search the document for the element you want.
3768
3769
=over 4
3770
3771
=item *
3772
3773
C - the XPath query
3774
3775
=item *
3776
3777
C - the CSS selector
3778
3779
=item *
3780
3781
C - a DOM node
3782
3783
=back
3784
3785
The remaining options are passed through to either the
3786
L<< /$mech->xpath|xpath >> or L<< /$mech->selector|selector >> method.
3787
3788
=cut
3789
3790
sub is_visible {
3791
0
0
1
my ($self,%options);
3792
0
0
if (2 == @_) {
3793
0
($self,$options{dom}) = @_;
3794
} else {
3795
0
($self,%options) = @_;
3796
};
3797
0
_default_limiter( 'maybe', \%options );
3798
0
0
if (! $options{dom}) {
3799
0
$options{dom} = $self->_option_query(%options);
3800
};
3801
# No element means not visible
3802
return
3803
0
0
unless $options{ dom };
3804
0
0
$options{ window } ||= $self->tab->{linkedBrowser}->{contentWindow};
3805
3806
0
my $_is_visible = $self->repl->declare(<<'JS');
3807
function (obj,window)
3808
{
3809
while (obj) {
3810
// No object
3811
if (!obj) return false;
3812
3813
try {
3814
if( obj["parentNode"] ) 1;
3815
} catch (e) {
3816
// Dead object
3817
return false
3818
};
3819
// Descends from document, so we're done
3820
if (obj.parentNode === obj.ownerDocument) {
3821
return true;
3822
};
3823
// Not in the DOM
3824
if (!obj.parentNode) {
3825
return false;
3826
};
3827
// Direct style check
3828
if (obj.style) {
3829
if (obj.style.display == 'none') return false;
3830
if (obj.style.visibility == 'hidden') return false;
3831
};
3832
3833
if (window.getComputedStyle) {
3834
var style = window.getComputedStyle(obj, null);
3835
if (style.display == 'none') {
3836
return false; }
3837
if (style.visibility == 'hidden') {
3838
return false;
3839
};
3840
};
3841
obj = obj.parentNode;
3842
};
3843
// The object does not live in the DOM at all
3844
return false
3845
}
3846
JS
3847
0
!!$_is_visible->($options{dom}, $options{window});
3848
};
3849
3850
=head2 C<< $mech->wait_until_invisible( $element ) >>
3851
3852
=head2 C<< $mech->wait_until_invisible( %options ) >>
3853
3854
$mech->wait_until_invisible( $please_wait );
3855
3856
Waits until an element is not visible anymore.
3857
3858
Takes the same options as L<< $mech->is_visible/->is_visible >>.
3859
3860
In addition, the following options are accepted:
3861
3862
=over 4
3863
3864
=item *
3865
3866
C - the timeout after which the function will C. To catch
3867
the condition and handle it in your calling program, use an L block.
3868
A timeout of C<0> means to never time out.
3869
3870
=item *
3871
3872
C - the interval in seconds used to L. Subsecond
3873
intervals are possible.
3874
3875
=back
3876
3877
Note that when passing in a selector, that selector is requeried
3878
on every poll instance. So the following query will work as expected:
3879
3880
xpath => '//*[contains(text(),"stand by")]'
3881
3882
This also means that if your selector query relies on finding
3883
a changing text, you need to pass the node explicitly instead of
3884
passing the selector.
3885
3886
=cut
3887
3888
sub wait_until_invisible {
3889
0
0
1
my ($self,%options);
3890
0
0
if (2 == @_) {
3891
0
($self,$options{dom}) = @_;
3892
} else {
3893
0
($self,%options) = @_;
3894
};
3895
0
0
my $sleep = delete $options{ sleep } || 0.3;
3896
0
0
my $timeout = delete $options{ timeout } || 0;
3897
3898
0
_default_limiter( 'maybe', \%options );
3899
3900
3901
0
my $timeout_after;
3902
0
0
if ($timeout) {
3903
0
$timeout_after = time + $timeout;
3904
};
3905
0
my $v;
3906
my $node;
3907
0
0
do {
0
3908
0
$node = $options{ dom };
3909
0
0
if (! $node) {
3910
0
$node = $self->_option_query(%options);
3911
};
3912
return
3913
0
0
unless $node;
3914
0
sleep $sleep;
3915
} while ( $v = $self->is_visible($node)
3916
and (!$timeout_after or time < $timeout_after ));
3917
0
0
0
if ($node and time >= $timeout_after) {
3918
0
croak "Timeout of $timeout seconds reached while waiting for element to become invisible";
3919
};
3920
};
3921
3922
# Internal method to run either an XPath, CSS or id query against the DOM
3923
# Returns the element(s) found
3924
my %rename = (
3925
xpath => 'xpath',
3926
selector => 'selector',
3927
id => 'by_id',
3928
by_id => 'by_id',
3929
);
3930
3931
sub _option_query {
3932
0
0
my ($self,%options) = @_;
3933
0
my ($method,$q);
3934
0
for my $meth (keys %rename) {
3935
0
0
if (exists $options{ $meth }) {
3936
0
$q = delete $options{ $meth };
3937
0
0
$method = $rename{ $meth } || $meth;
3938
}
3939
};
3940
0
_default_limiter( 'one' => \%options );
3941
0
0
croak "Need either a name, a selector or an xpath key!"
3942
if not $method;
3943
0
return $self->$method( $q, %options );
3944
};
3945
3946
=head2 C<< $mech->clickables() >>
3947
3948
print "You could click on\n";
3949
for my $el ($mech->clickables) {
3950
print $el->{innerHTML}, "\n";
3951
};
3952
3953
Returns all clickable elements, that is, all elements
3954
with an C attribute.
3955
3956
=cut
3957
3958
sub clickables {
3959
0
0
1
my ($self, %options) = @_;
3960
0
$self->xpath('//*[@onclick]', %options);
3961
};
3962
3963
=head2 C<< $mech->expand_frames( $spec ) >>
3964
3965
my @frames = $mech->expand_frames();
3966
3967
Expands the frame selectors (or C<1> to match all frames)
3968
into their respective DOM document nodes according to the current
3969
document. All frames will be visited in breadth first order.
3970
3971
This is mostly an internal method.
3972
3973
=cut
3974
3975
sub expand_frames {
3976
0
0
1
my ($self, $spec, $document) = @_;
3977
0
0
$spec ||= $self->{frames};
3978
0
0
my @spec = ref $spec ? @$spec : $spec;
3979
0
0
$document ||= $self->document;
3980
3981
0
0
0
if (! ref $spec and $spec !~ /\D/ and $spec == 1) {
0
3982
# All frames
3983
0
@spec = qw( frame iframe );
3984
};
3985
3986
# Optimize the default case of only names in @spec
3987
0
my @res;
3988
0
0
if (! grep {ref} @spec) {
0
3989
0
@res = map { $_->{contentDocument} }
0
3990
$self->selector(
3991
\@spec,
3992
document => $document,
3993
frames => 0, # otherwise we'll recurse :)
3994
);
3995
} else {
3996
0
0
@res =
3997
map { #warn "Expanding $_";
3998
0
ref $_
3999
? $_
4000
# Just recurse into the above code path
4001
: $self->expand_frames( $_, $document );
4002
} @spec;
4003
}
4004
};
4005
4006
=head1 IMAGE METHODS
4007
4008
=head2 C<< $mech->content_as_png( [$tab, \%coordinates, \%target_size ] ) >>
4009
4010
my $png_data = $mech->content_as_png();
4011
4012
# Create scaled-down 480px wide preview
4013
my $png_data = $mech->content_as_png(undef, undef, { width => 480 });
4014
4015
Returns the given tab or the current page rendered as PNG image.
4016
4017
All parameters are optional.
4018
4019
=over 4
4020
4021
=item *
4022
4023
C<$tab> defaults to the current tab.
4024
4025
=item *
4026
4027
If the coordinates are given, that rectangle will be cut out.
4028
The coordinates should be a hash with the four usual entries,
4029
C,C,C,C.
4030
4031
=item *
4032
4033
The target size of the image can also be given. It defaults to
4034
the size of the image. The allowed parameters in the hash are
4035
4036
C, C - for specifying the scale, default is 1.0 in each direction.
4037
4038
C, C - for specifying the target size
4039
4040
If you want the resulting image to be 480 pixels wide, specify
4041
4042
{ width => 480 }
4043
4044
The height will then be calculated from the ratio of original width to
4045
original height.
4046
4047
=back
4048
4049
This method is specific to WWW::Mechanize::Firefox.
4050
4051
Currently, the data transfer between Firefox and Perl
4052
is done Base64-encoded. It would be beneficial to find what's
4053
necessary to make JSON handle binary data more gracefully.
4054
4055
=cut
4056
4057
sub content_as_png {
4058
0
0
1
my ($self, $tab, $rect, $target_rect) = @_;
4059
0
0
$tab ||= $self->tab;
4060
0
0
$rect ||= {};
4061
0
0
$target_rect ||= {};
4062
4063
# Mostly taken from
4064
# http://wiki.github.com/bard/mozrepl/interactor-screenshot-server
4065
# Except for the addition of a target image size
4066
0
my $screenshot = $self->repl->declare(<<'JS');
4067
function (tab,rect,target_rect) {
4068
var browser = tab.linkedBrowser;
4069
var browserWindow = Components.classes['@mozilla.org/appshell/window-mediator;1']
4070
.getService(Components.interfaces.nsIWindowMediator)
4071
.getMostRecentWindow('navigator:browser');
4072
var win = browser.contentWindow;
4073
var body = win.document.body;
4074
if(!body) {
4075
return;
4076
};
4077
var canvas = browserWindow
4078
.document
4079
.createElementNS('http://www.w3.org/1999/xhtml', 'canvas');
4080
var left = rect.left || 0;
4081
var top = rect.top || 0;
4082
var width = rect.width || body.clientWidth;
4083
var height = rect.height || body.clientHeight;
4084
4085
if( isNaN( target_rect.scalex * target_rect.scaley ) || target_rect.scalex * target_rect.scaley == 0) {
4086
// No scale was given
4087
// Was a fixed target width / height given?
4088
if( target_rect.width ) {
4089
target_rect.scalex = target_rect.width / width;
4090
};
4091
if( target_rect.height ) {
4092
target_rect.scaley = target_rect.height / height
4093
};
4094
4095
// If only one of scalex / scaley is given, force the other
4096
// to be the same, default to 1.0
4097
target_rect.scalex = target_rect.scalex || target_rect.scaley || (target_rect.width / width) || 1.0;
4098
target_rect.scaley = target_rect.scaley || target_rect.scalex || (target_rect.height / height) || 1.0;
4099
} else {
4100
//alert("scales fixed");
4101
};
4102
// Calculate the target width/height if missing:
4103
target_rect.height = target_rect.height || height * target_rect.scaley;
4104
target_rect.width = target_rect.width || width * target_rect.scalex;
4105
4106
canvas.width = target_rect.width;
4107
canvas.height = target_rect.height;
4108
var ctx = canvas.getContext('2d');
4109
ctx.clearRect(0, 0, target_rect.width, target_rect.height);
4110
ctx.save();
4111
ctx.scale(target_rect.scalex, target_rect.scaley);
4112
ctx.drawWindow(win, left, top, width, height, 'rgb(255,255,255)');
4113
ctx.restore();
4114
4115
//return atob(
4116
return canvas
4117
.toDataURL('image/png', '')
4118
.split(',')[1]
4119
// );
4120
}
4121
JS
4122
0
my $scr = $screenshot->($tab, $rect, $target_rect);
4123
0
0
return $scr ? decode_base64($scr) : undef
4124
};
4125
4126
=head2 C<< $mech->element_as_png( $element ) >>
4127
4128
my $shiny = $mech->selector('#shiny', single => 1);
4129
my $i_want_this = $mech->element_as_png($shiny);
4130
4131
Returns PNG image data for a single element
4132
4133
=cut
4134
4135
sub element_as_png {
4136
0
0
1
my ($self, $element) = @_;
4137
0
my $tab = $self->tab;
4138
4139
0
my $pos = $self->element_coordinates($element);
4140
0
return $self->content_as_png($tab, $pos);
4141
};
4142
4143
=head2 C<< $mech->element_coordinates( $element ) >>
4144
4145
my $shiny = $mech->selector('#shiny', single => 1);
4146
my ($pos) = $mech->element_coordinates($shiny);
4147
print $pos->{left},',', $pos->{top};
4148
4149
Returns the page-coordinates of the C<$element>
4150
in pixels as a hash with four entries, C, C, C and C.
4151
4152
This function might get moved into another module more geared
4153
towards rendering HTML.
4154
4155
=cut
4156
4157
sub element_coordinates {
4158
0
0
1
my ($self, $element) = @_;
4159
4160
# Mostly taken from
4161
# http://www.quirksmode.org/js/findpos.html
4162
0
my $findPos = $self->repl->declare(<<'JS');
4163
function (obj) {
4164
var res = {
4165
left: 0,
4166
top: 0,
4167
width: obj.scrollWidth,
4168
height: obj.scrollHeight
4169
};
4170
if (obj.offsetParent) {
4171
do {
4172
res.left += obj.offsetLeft;
4173
res.top += obj.offsetTop;
4174
} while (obj = obj.offsetParent);
4175
}
4176
return res;
4177
}
4178
JS
4179
0
$findPos->($element);
4180
};
4181
4182
1;
4183
4184
__END__