line
stmt
bran
cond
sub
pod
time
code
1
package WWW::Mechanize::Firefox;
2
80
80
947979
use 5.006; #weaken
80
183
3
80
80
267
use strict;
80
80
80
1521
4
80
80
35133
use Time::HiRes qw(sleep); # hires sleep()
80
75717
80
280
5
6
80
80
40583
use URI ();
80
368793
80
1477
7
80
80
353
use File::Basename qw(dirname);
80
135
80
4809
8
80
80
33981
use HTTP::Response ();
80
1135178
80
1593
9
80
80
35152
use HTML::Selector::XPath 'selector_to_xpath';
80
147243
80
4167
10
80
80
31015
use MIME::Base64 'decode_base64';
80
34687
80
3874
11
80
80
30996
use WWW::Mechanize::Link;
80
22470
80
1868
12
80
80
29464
use Firefox::Application;
80
143
80
1790
13
80
80
336
use MozRepl::RemoteObject ();
80
83
80
825
14
80
80
225
use MozRepl::RemoteObject::Methods ();
80
84
80
865
15
80
80
27991
use HTTP::Cookies::MozRepl ();
80
138
80
1169
16
80
80
33875
use HTTP::Request::Common ();
80
203259
80
1561
17
80
80
351
use Scalar::Util qw'blessed weaken';
80
76
80
3594
18
80
80
282
use Encode qw(encode decode);
80
85
80
3092
19
80
80
274
use Carp qw(carp croak );
80
86
80
2930
20
21
80
80
261
use vars qw'$VERSION %link_spec @CARP_NOT';
80
86
80
48388
22
$VERSION = '0.79';
23
@CARP_NOT = ('MozRepl::RemoteObject',
24
'MozRepl::AnyEvent',
25
'MozRepl::RemoteObject::Instance'
26
); # we trust these blindly
27
28
=head1 NAME
29
30
WWW::Mechanize::Firefox - use Firefox as if it were WWW::Mechanize
31
32
=head1 SYNOPSIS
33
34
use WWW::Mechanize::Firefox;
35
my $mech = WWW::Mechanize::Firefox->new();
36
$mech->get('http://google.com');
37
38
$mech->eval_in_page('alert("Hello Firefox")');
39
my $png = $mech->content_as_png();
40
41
This module will let you automate Firefox through the
42
Mozrepl plugin. You need to have installed
43
that plugin in your Firefox.
44
45
For more examples see L.
46
47
=head1 CONSTRUCTOR and CONFIGURATION
48
49
=head2 C<< $mech->new( %args ) >>
50
51
use WWW::Mechanize::Firefox;
52
my $mech = WWW::Mechanize::Firefox->new();
53
54
Creates a new instance and connects it to Firefox.
55
56
Note that Firefox must have the C
57
extension installed and enabled.
58
59
The following options are recognized:
60
61
=over 4
62
63
=item *
64
65
C - regex for the title of the tab to reuse. If no matching tab is
66
found, the constructor dies.
67
68
If you pass in the string C, the currently
69
active tab will be used instead.
70
71
If you pass in a L instance, this will be used
72
as the new tab. This is convenient if you have an existing tab
73
in Firefox as object already, for example created through
74
LC<< ->addTab() >>.
75
76
=item *
77
78
C - will create a new tab if no existing tab matching
79
the criteria given in C can be found.
80
81
=item *
82
83
C - make the tab the active tab
84
85
=item *
86
87
C - name of the program to launch if we can't connect to it on
88
the first try.
89
90
=item *
91
92
C - an array reference of ids of subframes to include when
93
searching for elements on a page.
94
95
If you want to always search through all frames, just pass C<1>. This
96
is the default.
97
98
To prevent searching through frames, pass
99
100
frames => 0
101
102
To whitelist frames to be searched, pass the list
103
of frame selectors:
104
105
frames => ['#content_frame']
106
107
=item *
108
109
C - whether web failures converted are fatal Perl errors. See
110
the C accessor. True by default to make error checking easier.
111
112
To make errors non-fatal, pass
113
114
autodie => 0
115
116
in the constructor.
117
118
=item *
119
120
C - the name of the User Agent to use. This overrides
121
how Firefox identifies itself.
122
123
=item *
124
125
C - array reference to log levels, passed through to L
126
127
=item *
128
129
C - L buffer size, if the default of 1MB is not enough
130
131
=item *
132
133
C - the set of default Javascript events to listen for while
134
waiting for a reply. In fact, WWW::Mechanize::Firefox will almost always
135
wait until a 'DOMContentLoaded' or 'load' event. 'pagehide' events
136
will tell it for what frames to wait.
137
138
The default set is
139
140
'DOMContentLoaded','load',
141
'pageshow',
142
'pagehide',
143
'error','abort','stop',
144
145
=item *
146
147
C - a premade L
148
149
=item *
150
151
C - a premade L instance or a connection string
152
suitable for initializing one
153
154
=item *
155
156
C - whether to use the command queueing of L.
157
Default is 1.
158
159
=item *
160
161
C - whether to use native JSON encoder of Firefox
162
163
js_JSON => 'native', # force using the native JSON encoder
164
165
The default is to autodetect whether a native JSON encoder is available and
166
whether the transport is UTF-8 safe.
167
168
=item *
169
170
C - the events that are sent to an input field before its
171
value is changed. By default this is C<[focus]>.
172
173
=item *
174
175
C - the events that are sent to an input field after its
176
value is changed. By default this is C<[blur, change]>.
177
178
=back
179
180
=cut
181
182
sub new {
183
60
60
1
85967
my ($class, %args) = @_;
184
185
60
50
210
if (! ref $args{ app }) {
186
60
181
my @passthrough = qw(launch repl bufsize log use_queue js_JSON);
187
60
100
112
my %options = map { exists $args{ $_ } ? ($_ => delete $args{ $_ }) : () }
360
553
188
@passthrough;
189
60
353
$args{ app } = Firefox::Application->new(
190
%options
191
);
192
};
193
194
0
0
if (my $tabname = delete $args{ tab }) {
195
0
0
if (! ref $tabname) {
0
196
0
0
if ($tabname eq 'current') {
197
0
$args{ tab } = $args{ app }->selectedTab();
198
} else {
199
0
croak "Don't know what to do with tab '$tabname'. Did you mean qr{$tabname}?";
200
};
201
} elsif ('MozRepl::RemoteObject::Instance' eq ref $tabname) {
202
# Nothing to do - we already got a tab passed in
203
# Just put it back in place
204
0
$args{ tab } = $tabname;
205
} else {
206
0
($args{ tab }) = grep { $_->{title} =~ /$tabname/ }
207
0
$args{ app }->openTabs();
208
0
0
if (! $args{ tab }) {
209
0
0
if (! delete $args{ create }) {
210
0
croak "Couldn't find a tab matching /$tabname/";
211
} else {
212
# fall through into tab creation
213
};
214
} else {
215
0
$args{ tab } = $args{ tab }->{tab};
216
};
217
};
218
};
219
0
0
if (! $args{ tab }) {
220
0
0
my @autoclose = exists $args{ autoclose } ? (autoclose => $args{ autoclose }) : ();
221
0
$args{ tab } = $args{ app }->addTab( @autoclose );
222
0
my $body = $args{ tab }->MozRepl::RemoteObject::Methods::dive(qw[ linkedBrowser contentWindow document body ]);
223
0
$body->{innerHTML} = __PACKAGE__;
224
};
225
226
0
0
if (delete $args{ autoclose }) {
227
0
$args{ app }->autoclose_tab($args{ tab });
228
};
229
0
0
if (! exists $args{ autodie }) { $args{ autodie } = 1 };
0
230
231
$args{ events } ||= [
232
0
0
'DOMContentLoaded','load',
233
'pageshow', # Navigation from cache will use "pageshow"
234
#'pagehide',
235
'error','abort','stop',
236
];
237
0
0
$args{ on_event } ||= undef;
238
0
0
$args{ pre_value } ||= ['focus'];
239
0
0
$args{ post_value } ||= ['change','blur'];
240
0
0
if( ! exists $args{ frames }) {
241
0
0
$args{ frames } ||= 1; # we default to searching frames
242
};
243
244
die "No tab found"
245
0
0
unless $args{tab};
246
247
0
0
if (delete $args{ activate }) {
248
0
$args{ app }->activateTab( $args{ tab });
249
};
250
251
0
0
$args{ response } ||= undef;
252
0
0
$args{ current_form } ||= undef;
253
254
0
0
$args{ event_log } ||= [];
255
256
0
my $agent = delete $args{ agent };
257
258
0
my $self= bless \%args, $class;
259
260
0
$self->_initXpathResultTypes;
261
262
0
0
if( defined $agent ) {
263
0
$self->agent( $agent );
264
};
265
266
0
$self
267
};
268
269
sub DESTROY {
270
0
0
my ($self) = @_;
271
0
local $@;
272
0
0
if (my $app = delete $self->{ app }) {
273
0
%$self = (); # wipe out all references we keep
274
# but keep $app alive until we can dispose of it
275
# as the last thing, now:
276
0
$app = undef;
277
};
278
}
279
280
=head2 C<< $mech->agent( $product_id ); >>
281
282
$mech->agent('wonderbot/JS 1.0');
283
284
Set the product token that is used to identify the user agent on the network.
285
The agent value is sent as the "User-Agent" header in the requests. The default
286
is whatever Firefox uses.
287
288
To reset the user agent to the Firefox default, pass an empty string:
289
290
$mech->agent('');
291
292
=cut
293
294
sub agent {
295
0
0
1
my ($self,$name) = @_;
296
0
0
if( defined $name ) {
0
297
0
$self->add_header('User-Agent',$name);
298
} elsif( $name eq '' ) {
299
0
$self->delete_header('User-Agent');
300
};
301
};
302
303
=head2 C<< $mech->autodie( [$state] ) >>
304
305
$mech->autodie(0);
306
307
Accessor to get/set whether warnings become fatal.
308
309
=cut
310
311
0
0
0
1
sub autodie { $_[0]->{autodie} = $_[1] if @_ == 2; $_[0]->{autodie} }
0
312
313
=head2 C<< $mech->events() >>
314
315
$mech->events( ['load'] );
316
317
Sets or gets the set of Javascript events that WWW::Mechanize::Firefox
318
will wait for after requesting a new page. Returns an array reference.
319
320
Changing the set of events will most likely make WWW::Mechanize::Firefox
321
stall while waiting for a response.
322
323
This method is special to WWW::Mechanize::Firefox.
324
325
=cut
326
327
0
0
0
1
sub events { $_[0]->{events} = $_[1] if (@_ > 1); $_[0]->{events} };
0
328
329
=head2 C<< $mech->on_event() >>
330
331
$mech->on_event(1); # prints every page load event
332
333
# or give it a callback
334
$mech->on_event(sub { warn "Page loaded with $ev->{name} event" });
335
336
Gets/sets the notification handler for the Javascript event
337
that finished a page load. Set it to C<1> to output via C,
338
or a code reference to call it with the event.
339
340
This method is special to WWW::Mechanize::Firefox.
341
342
=cut
343
344
0
0
0
1
sub on_event { $_[0]->{on_event} = $_[1] if (@_ > 1); $_[0]->{on_event} };
0
345
346
=head2 C<< $mech->cookies() >>
347
348
my $cookie_jar = $mech->cookies();
349
350
Returns a L object that was initialized
351
from the live Firefox instance.
352
353
B C<< ->set_cookie >> is not yet implemented,
354
as is saving the cookie jar.
355
356
=cut
357
358
sub cookies {
359
0
0
1
return HTTP::Cookies::MozRepl->new(
360
repl => $_[0]->repl
361
)
362
}
363
364
=head1 JAVASCRIPT METHODS
365
366
=head2 C<< $mech->allow( %options ) >>
367
368
Enables or disables browser features for the current tab.
369
The following options are recognized:
370
371
=over 4
372
373
=item *
374
375
C - Whether to allow plugin execution.
376
377
=item *
378
379
C - Whether to allow Javascript execution.
380
381
=item *
382
383
C - Attribute stating if refresh based redirects can be allowed.
384
385
=item *
386
387
C, C - Attribute stating if it should allow subframes (framesets/iframes) or not.
388
389
=item *
390
391
C - Attribute stating whether or not images should be loaded.
392
393
=back
394
395
Options not listed remain unchanged.
396
397
=head3 Disable Javascript
398
399
$mech->allow( javascript => 0 );
400
401
=cut
402
403
80
80
1395
use vars '%known_options';
80
107
80
275620
404
%known_options = (
405
'javascript' => 'allowJavascript',
406
'plugins' => 'allowPlugins',
407
'metaredirects' => 'allowMetaRedirects',
408
'subframes' => 'allowSubframes',
409
'frames' => 'allowSubframes',
410
'images' => 'allowImages',
411
);
412
413
sub allow {
414
0
0
1
my ($self,%options) = @_;
415
0
my $shell = $self->docshell;
416
0
for my $opt (sort keys %options) {
417
0
0
if (my $opt_js = $known_options{ $opt }) {
418
0
$shell->{$opt_js} = $options{ $opt };
419
} else {
420
0
carp "Unknown option '$opt_js' (ignored)";
421
};
422
};
423
};
424
425
=head2 C<< $mech->js_errors() >>
426
427
print $_->{message}
428
for $mech->js_errors();
429
430
An interface to the Javascript Error Console
431
432
Returns the list of errors in the JEC
433
434
Maybe this should be called C or
435
C instead.
436
437
=cut
438
439
sub js_console {
440
0
0
0
my ($self) = @_;
441
0
my $getConsoleService = $self->repl->declare(<<'JS');
442
function() {
443
return Components.classes["@mozilla.org/consoleservice;1"]
444
.getService(Components.interfaces.nsIConsoleService);
445
}
446
JS
447
0
$getConsoleService->()
448
}
449
450
sub js_errors {
451
0
0
1
my ($self,$page) = @_;
452
0
my $console = $self->js_console;
453
0
my $getErrorMessages = $self->repl->declare(<<'JS', 'list');
454
function (consoleService) {
455
var out = {};
456
consoleService.getMessageArray(out, {});
457
return out.value || []
458
};
459
JS
460
0
$getErrorMessages->($console);
461
}
462
463
=head2 C<< $mech->clear_js_errors() >>
464
465
$mech->clear_js_errors();
466
467
Clears all Javascript messages from the console
468
469
=cut
470
471
sub clear_js_errors {
472
0
0
1
my ($self,$page) = @_;
473
0
$self->js_console->reset;
474
475
};
476
477
=head2 C<< $mech->eval_in_page( $str [, $env [, $document]] ) >>
478
479
=head2 C<< $mech->eval( $str [, $env [, $document]] ) >>
480
481
my ($value, $type) = $mech->eval( '2+2' );
482
483
Evaluates the given Javascript fragment in the
484
context of the web page.
485
Returns a pair of value and Javascript type.
486
487
This allows access to variables and functions declared
488
"globally" on the web page.
489
490
The returned result needs to be treated with
491
extreme care because
492
it might lead to Javascript execution in the context of
493
your application instead of the context of the webpage.
494
This should be evident for functions and complex data
495
structures like objects. When working with results from
496
untrusted sources, you can only safely use simple
497
types like C.
498
499
If you want to modify the environment the code is run under,
500
pass in a hash reference as the second parameter. All keys
501
will be inserted into the C object as well as
502
C. Also, complex data structures are only
503
supported if they contain no objects.
504
If you need finer control, you'll have to
505
write the Javascript yourself.
506
507
This method is special to WWW::Mechanize::Firefox.
508
509
Also, using this method opens a potential B as
510
the returned values can be objects and using these objects
511
can execute malicious code in the context of the Firefox application.
512
513
=cut
514
515
sub eval_in_page {
516
0
0
1
my ($self,$str,$env,$doc,$window) = @_;
517
0
0
$env ||= {};
518
0
my $js_env = {};
519
0
0
$doc ||= $self->document;
520
521
# do a manual transfer of keys, to circumvent our stupid
522
# transformation routine:
523
0
0
if (keys %$env) {
524
0
$js_env = $self->repl->declare(<<'JS')->();
525
function () { return new Object }
526
JS
527
0
for my $k (keys %$env) {
528
0
$js_env->{$k} = $env->{$k};
529
};
530
};
531
532
0
my $eval_in_sandbox = $self->repl->declare(<<'JS', 'list');
533
function (w,d,str,env,caller,line) {
534
var unsafeWin = w.wrappedJSObject;
535
var safeWin = XPCNativeWrapper(unsafeWin);
536
var sandbox = Components.utils.Sandbox(safeWin);
537
sandbox.window = safeWin;
538
sandbox.document = d;
539
// Transfer the environment
540
for (var e in env) {
541
sandbox[e] = env[e]
542
sandbox.window[e] = env[e]
543
}
544
sandbox.__proto__ = unsafeWin;
545
546
var res = Components.utils.evalInSandbox(str, sandbox, "1.8",caller,line);
547
return [res,typeof(res)];
548
};
549
JS
550
0
0
$window ||= $self->tab->{linkedBrowser}->{contentWindow};
551
# Report errors from scope of caller
552
# This feels weirdly backwards here, but oh well:
553
#local @CARP_NOT = (ref $self->repl); # we trust this
554
555
0
my ($caller,$line) = (caller)[1,2];
556
557
0
$eval_in_sandbox->($window,$doc,$str,$js_env,$caller,$line);
558
};
559
*eval = \&eval_in_page;
560
561
=head2 C<< $mech->unsafe_page_property_access( ELEMENT ) >>
562
563
Allows you unsafe access to properties of the current page. Using
564
such properties is an incredibly bad idea.
565
566
This is why the function Cs. If you really want to use
567
this function, edit the source code.
568
569
=cut
570
571
sub unsafe_page_property_access {
572
0
0
1
my ($mech,$element) = @_;
573
0
die;
574
0
my $window = $mech->tab->{linkedBrowser}->{contentWindow};
575
0
my $unsafe = $window->{wrappedJSObject};
576
0
$unsafe->{$element}
577
};
578
579
=head1 UI METHODS
580
581
See also L for how to add more than one tab
582
and how to manipulate windows and tabs.
583
584
=head2 C<< $mech->application() >>
585
586
my $ff = $mech->application();
587
588
Returns the L object for manipulating
589
more parts of the Firefox UI and application.
590
591
=cut
592
593
0
0
1
sub application { $_[0]->{app} };
594
595
=head2 C<< $mech->autoclose_tab >>
596
597
$mech->autoclose_tab( 0 ); # keep tab open after program end
598
599
Set whether to close the tab associated with the instance.
600
601
=cut
602
603
sub autoclose_tab {
604
0
0
1
my $self = shift;
605
0
$self->application->autoclose_tab($self->tab, @_);
606
};
607
608
=head2 C<< $mech->tab() >>
609
610
Gets the object that represents the Firefox tab used by WWW::Mechanize::Firefox.
611
612
This method is special to WWW::Mechanize::Firefox.
613
614
=cut
615
616
0
0
1
sub tab { $_[0]->{tab} };
617
618
=head2 C<< $mech->make_progress_listener( %callbacks ) >>
619
620
my $eventlistener = $mech->progress_listener(
621
onStateChange => \&onStateChange,
622
);
623
624
Creates an unconnected C<< nsIWebProgressListener >> interface
625
which calls the Perl subroutines you pass in.
626
627
Returns a handle. Once the handle gets released, all callbacks will
628
get stopped. Also, all Perl callbacks will get deregistered from the
629
Javascript bridge, so make sure not to use the same callback
630
in different progress listeners at the same time.
631
The sender may still call your callbacks.
632
633
=cut
634
635
sub make_progress_listener {
636
0
0
1
my ($mech,%handlers) = @_;
637
0
my $NOTIFY_STATE = $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
638
+ $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS')
639
;
640
0
my ($obj) = $mech->repl->expr('new Object');
641
0
for my $key (keys %handlers) {
642
0
$obj->{$key} = $handlers{$key};
643
};
644
#warn "Listener created";
645
646
0
my $mk_nsIWebProgressListener = $mech->repl->declare(<<'JS');
647
function (myListener) {
648
var callbacks = ["onStateChange",
649
"onLocationChange",
650
"onProgressChange",
651
"onStatusChange",
652
"onSecurityChange"
653
// ,"onProgressChange64"
654
// ,"onRefreshAttempted"
655
];
656
for (var h in callbacks) {
657
var e = callbacks[h];
658
if (! myListener[e]) {
659
myListener[e] = function(){}
660
} else {
661
// alert("Setting callback for " + e);
662
};
663
};
664
myListener.QueryInterface = function(aIID) {
665
if (aIID.equals(Components.interfaces.nsIWebProgressListener) ||
666
// aIID.equals(Components.interfaces.nsIWebProgressListener2) ||
667
aIID.equals(Components.interfaces.nsISupportsWeakReference) ||
668
aIID.equals(Components.interfaces.nsISupports))
669
return this;
670
throw Components.results.NS_NOINTERFACE;
671
};
672
return myListener
673
}
674
JS
675
676
# Declare it here so we don't close over $lsn!
677
my $release = sub {
678
0
0
0
$_[0]->bridge->remove_callback(values %handlers)
679
if $_[0]->bridge;
680
0
};
681
0
my $lsn = $mk_nsIWebProgressListener->($obj);
682
0
$lsn->__on_destroy($release);
683
0
$lsn
684
};
685
686
687
=head2 C<< $mech->progress_listener( $source, %callbacks ) >>
688
689
my $eventlistener = progress_listener(
690
$browser,
691
onLocationChange => \&onLocationChange,
692
);
693
694
Sets up the callbacks for the C<< nsIWebProgressListener >> interface
695
to be the Perl subroutines you pass in.
696
697
C< $source > needs to support C<.addProgressListener> and C<.removeProgressListener>.
698
699
Returns a handle. Once the handle gets released, all callbacks will
700
get stopped. Also, all Perl callbacks will get deregistered from the
701
Javascript bridge, so make sure not to use the same callback
702
in different progress listeners at the same time.
703
704
=cut
705
706
sub progress_listener {
707
0
0
1
my ($self,$source,%handlers) = @_;
708
709
0
my $lsn = $self->make_progress_listener(%handlers);
710
0
$lsn->{source} = $source;
711
712
0
$lsn->__release_action('if(self.source)try{self.source.removeProgressListener(self)}catch(e){}');
713
0
my $NOTIFY_STATE = $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
714
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_LOCATION')
715
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS');
716
0
$source->addProgressListener($lsn,$NOTIFY_STATE);
717
0
$lsn
718
};
719
720
=head2 C<< $mech->repl() >>
721
722
my ($value,$type) = $mech->repl->expr('2+2');
723
724
Gets the L instance that is used.
725
726
This method is special to WWW::Mechanize::Firefox.
727
728
=cut
729
730
0
0
1
sub repl { $_[0]->application->repl };
731
732
=head2 C<< $mech->highlight_node( @nodes ) >>
733
734
my @links = $mech->selector('a');
735
$mech->highlight_node(@links);
736
737
Convenience method that marks all nodes in the arguments
738
with
739
740
background: red;
741
border: solid black 1px;
742
display: block; /* if the element was display: none before */
743
744
This is convenient if you need visual verification that you've
745
got the right nodes.
746
747
There currently is no way to restore the nodes to their original
748
visual state except reloading the page.
749
750
=cut
751
752
sub highlight_node {
753
0
0
1
my ($self,@nodes) = @_;
754
0
for (@nodes) {
755
0
my $style = $_->{style};
756
$style->{display} = 'block'
757
0
0
if $style->{display} eq 'none';
758
0
$style->{background} = 'red';
759
0
$style->{border} = 'solid black 1px;';
760
};
761
};
762
763
=head1 NAVIGATION METHODS
764
765
=head2 C<< $mech->get( $url, %options ) >>
766
767
$mech->get( $url, ':content_file' => $tempfile );
768
769
Retrieves the URL C into the tab.
770
771
It returns a faked L object for interface compatibility
772
with L.
773
774
Recognized options:
775
776
=over 4
777
778
=item *
779
780
C<< :content_file >> - filename to store the data in
781
782
=item *
783
784
C<< no_cache >> - if true, bypass the browser cache
785
786
=item *
787
788
C<< synchronize >> - wait until all elements have loaded
789
790
The default is to wait until all elements have loaded. You can switch
791
this off by passing
792
793
synchronize => 0
794
795
for example if you want to manually poll for an element that appears fairly
796
early during the load of a complex page.
797
798
=back
799
800
=cut
801
802
sub get {
803
0
0
1
my ($self,$url, %options) = @_;
804
0
my $b = $self->tab->{linkedBrowser};
805
0
$self->clear_current_form;
806
807
0
my $flags = 0;
808
0
0
if ($options{ no_cache }) {
809
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
810
};
811
0
0
if (! exists $options{ synchronize }) {
812
0
$options{ synchronize } = $self->events;
813
};
814
0
0
if( !ref $options{ synchronize }) {
815
$options{ synchronize } = $options{ synchronize }
816
0
0
? $self->events
817
: []
818
};
819
820
$self->_sync_call( $options{ synchronize }, sub {
821
0
0
0
if (my $target = delete $options{":content_file"}) {
822
0
$self->save_url($url => ''.$target, %options);
823
} else {
824
0
$b->loadURIWithFlags(''.$url,$flags);
825
};
826
0
});
827
};
828
829
=head2 C<< $mech->get_local( $filename , %options ) >>
830
831
$mech->get_local('test.html');
832
833
Shorthand method to construct the appropriate
834
C<< file:// >> URI and load it into Firefox. Relative
835
paths will be interpreted as relative to C<$0>.
836
837
This method accepts the same options as C<< ->get() >>.
838
839
This method is special to WWW::Mechanize::Firefox but could
840
also exist in WWW::Mechanize through a plugin.
841
842
=cut
843
844
sub get_local {
845
0
0
1
my ($self, $htmlfile, %options) = @_;
846
0
require Cwd;
847
0
require File::Spec;
848
0
my $fn = File::Spec->rel2abs(
849
File::Spec->catfile(dirname($0),$htmlfile),
850
Cwd::getcwd(),
851
);
852
0
$fn =~ s!\\!/!g; # fakey "make file:// URL"
853
854
0
$self->get("file://$fn", %options);
855
}
856
857
=head2 C<< $mech->post( $url, %options ) >>
858
859
$mech->post( 'http://example.com',
860
params => { param => "Hello World" },
861
headers => {
862
"Content-Type" => 'application/x-www-form-urlencoded',
863
},
864
charset => 'utf-8',
865
);
866
867
Sends a POST request to C<$url>.
868
869
A C header will be automatically calculated if
870
it is not given.
871
872
The following options are recognized:
873
874
=over 4
875
876
=item *
877
878
C - a hash of HTTP headers to send. If not given,
879
the content type will be generated automatically.
880
881
=item *
882
883
C - the raw data to send, if you've encoded it already.
884
885
=back
886
887
=cut
888
889
sub post {
890
0
0
1
my ($self, $url, %options) = @_;
891
0
my $b = $self->tab->{linkedBrowser};
892
0
$self->clear_current_form;
893
894
0
my $flags = 0;
895
0
0
if ($options{no_cache}) {
896
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
897
};
898
0
0
if (! exists $options{synchronize}) {
899
0
$options{synchronize} = $self->events;
900
};
901
0
0
if( !ref $options{synchronize}) {
902
$options{synchronize} = $options{synchronize}
903
0
0
? $self->events
904
: []
905
};
906
907
# If we don't have data, encode the parameters:
908
0
0
if( !$options{ data }) {
909
0
my $req= HTTP::Request::Common::POST( $url, $options{params} );
910
0
$options{ data } = $req->content;
911
};
912
913
0
0
$options{ charset } ||= 'utf-8';
914
0
0
$options{ headers } ||= {};
915
0
0
$options{ headers }->{"Content-Type"} ||= "application/x-www-form-urlencoded";
916
0
0
if( $options{ charset }) {
917
0
$options{ headers }->{"Content-Type"} .= "; charset=$options{ charset }";
918
};
919
920
0
my $streamPostData = $self->repl->declare(<<'JS');
921
function(headers, dataString) {
922
// POST method requests must wrap the encoded text in a MIME stream
923
const Cc = Components.classes;
924
const Ci = Components.interfaces;
925
var stringStream = Cc["@mozilla.org/io/string-input-stream;1"].
926
createInstance(Ci.nsIStringInputStream);
927
if ("data" in stringStream) // Gecko 1.9 or newer
928
stringStream.data = dataString;
929
else // 1.8 or older
930
stringStream.setData(dataString, dataString.length);
931
932
var postData = Cc["@mozilla.org/network/mime-input-stream;1"].
933
createInstance(Ci.nsIMIMEInputStream);
934
for( h in headers ) {
935
postData.addHeader( h, headers[h] );
936
};
937
postData.addContentLength = true;
938
postData.setData(stringStream);
939
940
return postData;
941
}
942
JS
943
944
$self->_sync_call($options{synchronize}, sub {
945
0
0
my $postData = $streamPostData->($options{headers}, $options{data});
946
0
$b->loadURIWithFlags(''.$url, $flags, undef, $options{charset}, $postData);
947
0
});
948
}
949
950
=head2 C<< $mech->add_header( $name => $value, ... ) >>
951
952
$mech->add_header(
953
'X-WWW-Mechanize-Firefox' => "I'm using it",
954
Encoding => 'text/klingon',
955
);
956
957
This method sets up custom headers that will be sent with B HTTP(S)
958
request that Firefox makes.
959
960
Using multiple instances of WWW::Mechanize::Firefox objects with the same
961
application together with changed request headers will most likely have weird
962
effects. So don't do that.
963
964
Note that currently, we only support one value per header.
965
966
Some versions of Firefox don't work with the method that is used to set
967
the custom headers. Please see C for the exact
968
versions where the implemented mechanism doesn't work. Roughly, this is
969
for versions 17 to 24 of Firefox.
970
971
=cut
972
973
# This subroutine creates the custom header observer. It has a hashref
974
# of headers that it will add to EACH request that Firefox sends out.
975
# It removes itself when the Perl object gets destroyed.
976
sub _custom_header_observer {
977
0
0
my ($self, @headers) = @_;
978
979
# This routine was taken from http://d.hatena.ne.jp/oppara/20090410/p1
980
0
my $on_modify_request = $self->repl->declare(<<'JS');
981
function() { // headers passed via arguments
982
const Cc= Components.classes;
983
const Ci= Components.interfaces;
984
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
985
var h= [].slice.call(arguments);
986
var hr= {};
987
for( var i=0; i
988
var k= h[i];
989
var v= h[i+1];
990
hr[k]= v;
991
};
992
993
var myObserver= {
994
headers: hr,
995
observe: function(subject,topic,data) {
996
if(topic != 'http-on-modify-request') return;
997
998
var http = subject.QueryInterface(Ci.nsIHttpChannel);
999
for( var k in this.headers) {
1000
var v= this.headers[k];
1001
http.setRequestHeader(k,v, false);
1002
1003
if (k== 'Referer' && http.referrer) {
1004
http.referrer.spec = v;
1005
};
1006
};
1007
}
1008
}
1009
observerService.addObserver(myObserver,'http-on-modify-request',false);
1010
return myObserver;
1011
};
1012
JS
1013
0
my $obs = $on_modify_request->(@headers);
1014
1015
# Clean up after ourselves
1016
0
$obs->__release_action(<<'JS');
1017
const Cc= Components.classes;
1018
const Ci= Components.interfaces;
1019
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1020
try {
1021
observerService.removeObserver(self,'http-on-modify-request',false);
1022
} catch (e) {}
1023
JS
1024
0
return $obs;
1025
};
1026
1027
sub add_header {
1028
0
0
1
my ($self, @headers) = @_;
1029
0
0
$self->{custom_header_observer} ||= $self->_custom_header_observer;
1030
1031
# This is slooow, but we only do it when changing the headers...
1032
0
my $h = $self->{custom_header_observer}->{headers};
1033
0
while( my ($k,$v) = splice @headers, 0, 2 ) {
1034
0
$h->{$k} = $v;
1035
};
1036
};
1037
1038
=head2 C<< $mech->delete_header( $name , $name2... ) >>
1039
1040
$mech->delete_header( 'User-Agent' );
1041
1042
Removes HTTP headers from the agent's list of special headers. Note
1043
that Firefox may still send a header with its default value.
1044
1045
=cut
1046
1047
sub delete_header {
1048
0
0
1
my ($self, @headers) = @_;
1049
1050
0
0
0
if( $self->{custom_header_observer} and @headers ) {
1051
# This is slooow, but we only do it when changing the headers...
1052
0
my $h = $self->{custom_header_observer}->{headers};
1053
1054
delete $h->{$_}
1055
0
for( @headers );
1056
};
1057
};
1058
1059
=head2 C<< $mech->reset_headers >>
1060
1061
$mech->reset_headers();
1062
1063
Removes all custom headers and makes Firefox send its defaults again.
1064
1065
=cut
1066
1067
sub reset_headers {
1068
0
0
1
my ($self) = @_;
1069
0
delete $self->{custom_header_observer};
1070
};
1071
1072
sub _addLoadEventListener {
1073
0
0
my ($self,%options) = @_;
1074
1075
0
0
$options{ tab } ||= $self->tab;
1076
0
0
$options{ window } ||= $self->application->getMostRecentWindow;
1077
0
0
$options{ events } ||= $self->events;
1078
0
my $add_load_listener = $self->repl->declare(<<'JS');
1079
function( mainWindow, tab, waitForLoad, events ) {
1080
var browser= mainWindow.gBrowser.getBrowserForTab( tab );
1081
1082
var lock= {
1083
"busy": 1,
1084
"log":[],
1085
"events": events,
1086
"browser": browser,
1087
"cb": undefined,
1088
"release": function() {
1089
for(var i=0; i
1090
this.browser.removeEventListener(this.events[i], this.cb, true);
1091
};
1092
}
1093
};
1094
var unloadedFrames= [];
1095
1096
lock.cb= function (e) {
1097
var t= e.target;
1098
var toplevel= (t == browser.contentDocument);
1099
lock.log.push("Event "+e.type);
1100
var reloadedFrame= false;
1101
lock.log.push( "" + unloadedFrames.length + " frames.");
1102
1103
if( "FRAME" == t.tagName
1104
|| "IFRAME" == t.tagName ) {
1105
loc= t.src;
1106
} else if( !t.tagName ) {
1107
// Document
1108
loc= t.URL;
1109
} else { // ignore
1110
lock.log.push("Ignoring " + e.type + " on " + t.tagName);
1111
};
1112
try {
1113
if( t instanceof HTMLDocument ) {
1114
// We are only interested in HTML pages here
1115
var container= t.defaultView.frameElement || browser.contentWindow;
1116
for( var i=0; i < unloadedFrames.length; i++ ) {
1117
try {
1118
// lock.log.push( "" + i + " " + unloadedFrames[i].id + " - " + unloadedFrames[i].src );
1119
reloadedFrame= reloadedFrame
1120
|| unloadedFrames[i] === container;
1121
} catch (e) {
1122
// alert("Some frame element has gone away already...");
1123
};
1124
// alert("Caught " + e.type + " on remembered element. Great - " + reloadedFrame);
1125
};
1126
1127
if ("pagehide" == e.type && container ) {
1128
// alert("pagehide on container /lock"+lock.id);
1129
// A frame or window gets reloaded.
1130
// A frame gets reloaded. We remember it so we can
1131
// tell when it has completed. We won't get a separate
1132
// completion event on the parent document :-(
1133
lock.log.push("Remembering frame parent, for 'load' event");
1134
unloadedFrames.push( container );
1135
// Maybe we should just attach all events here?!
1136
};
1137
};
1138
} catch (e) { alert("Error while looking: " + e.message+" " + e.line) };
1139
1140
// if (! toplevel && !reloadedFrame ) { return ; };
1141
lock.log.push("<> " + e.type + " on " + loc);
1142
1143
if( (reloadedFrame)
1144
// && !waitForLoad
1145
&& "DOMContentLoaded" == e.type
1146
) {
1147
// We loaded a document
1148
// See if it contains (i)frames
1149
// and wait for "load" to fire if so
1150
// alert("Reloaded a container /lock:" + lock.id);
1151
lock.log.push("DOMContentLoaded for toplevel");
1152
var q= "//IFRAME|//FRAME";
1153
var frames= t.evaluate(q,t,null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ).snapshotLength;
1154
lock.log.push("Found " + frames + " frames");
1155
if( frames ) {
1156
lock.log.push("Waiting for 'load' because we found frames");
1157
waitForLoad= true;
1158
} else if( /^about:neterror\?/.test( loc ) || !waitForLoad ) {
1159
lock.log.push("Early out on DOMContentLoaded");
1160
lock.busy= 0;
1161
};
1162
1163
} else if( (reloadedFrame)
1164
&& ( "load" == e.type
1165
|| "pageshow" == e.type
1166
)) { // We always are done on "load" on toplevel
1167
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1168
lock.busy= 0;
1169
1170
} else if( (toplevel || reloadedFrame)
1171
&& ("error" == e.type || "stop" == e.type)) { // We always are done on "load" on toplevel
1172
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1173
lock.busy= 0;
1174
};
1175
1176
};
1177
1178
for(var i=0; i
1179
browser.addEventListener(events[i], lock.cb, true);
1180
};
1181
lock.log.push("Listening");
1182
1183
return lock
1184
}
1185
JS
1186
0
return $add_load_listener->($options{ window }, $options{ tab }, 1, $options{ events } );
1187
}
1188
1189
sub _addEventListener {
1190
0
0
my ($self,@args) = @_;
1191
0
0
0
if (@args <= 2 and ref($args[0]) eq 'MozRepl::RemoteObject::Instance') {
1192
0
@args = [@args];
1193
};
1194
0
for (@args) {
1195
0
0
$_->[1] ||= $self->events;
1196
0
0
$_->[1] = [$_->[1]]
1197
unless ref $_->[1];
1198
};
1199
# Now, flatten the arg list again...
1200
0
@args = map { @$_ } @args;
0
1201
1202
# This registers multiple events for a one-shot event
1203
0
my $make_semaphore = $self->repl->declare(<<'JS');
1204
function() {
1205
var lock = { "busy": 0, "event" : null };
1206
var listeners = [];
1207
var pairs = arguments;
1208
for( var k = 0; k < pairs.length ; k++) {
1209
var b = pairs[k];
1210
k++;
1211
var events = pairs[k];
1212
1213
for( var i = 0; i < events.length; i++) {
1214
var evname = events[i];
1215
var callback = (function(listeners,evname){
1216
return function(e) {
1217
if (! lock.busy) {
1218
lock.busy++;
1219
lock.event = e.type;
1220
lock.js_event = {};
1221
lock.js_event.target = e.originalTarget;
1222
lock.js_event.type = e.type;
1223
//alert("Caught first event " + e.type + " " + e.message);
1224
} else {
1225
//alert("Caught duplicate event " + e.type + " " + e.message);
1226
};
1227
for( var j = 0; j < listeners.length; j++) {
1228
listeners[j][0].removeEventListener(listeners[j][1],listeners[j][2],true);
1229
};
1230
};
1231
})(listeners,evname);
1232
listeners.push([b,evname,callback]);
1233
b.addEventListener(evname,callback,true);
1234
};
1235
};
1236
return lock
1237
}
1238
JS
1239
# $browser,$events
1240
0
return $make_semaphore->(@args);
1241
};
1242
1243
sub _wait_while_busy {
1244
0
0
my ($self,@elements) = @_;
1245
# Now do the busy-wait
1246
# Should this also include a ->poll()
1247
# and a callback?
1248
0
my $i=0;
1249
0
while (1) {
1250
0
$i++;
1251
0
0
last if($i == 30 );
1252
0
for my $element (@elements) {
1253
0
0
0
if ((my $s = $element->{busy} || 0) < 1) {
1254
0
for my $element (@elements) {
1255
0
push @{ $self->{event_log} },
1256
0
join "\n", @{ $element->{log}};
0
1257
};
1258
0
return $element;
1259
};
1260
};
1261
0
sleep 0.1;
1262
1263
# if (time-$timer > 4) {
1264
# $timer= time;
1265
# for my $element (@elements) {
1266
# for (@{ $element->{log}}) {
1267
# print $_,"\n";
1268
# };
1269
# print "---\n";
1270
# };
1271
# };
1272
};
1273
}
1274
1275
=head2 C<< $mech->synchronize( $event, $callback ) >>
1276
1277
Wraps a synchronization semaphore around the callback
1278
and waits until the event C<$event> fires on the browser.
1279
If you want to wait for one of multiple events to occur,
1280
pass an array reference as the first parameter.
1281
1282
Usually, you want to use it like this:
1283
1284
my $l = $mech->xpath('//a[@onclick]', single => 1);
1285
$mech->synchronize('DOMFrameContentLoaded', sub {
1286
$mech->click( $l );
1287
});
1288
1289
It is necessary to synchronize with the browser whenever
1290
a click performs an action that takes longer and
1291
fires an event on the browser object.
1292
1293
The C event is fired by Firefox when
1294
the whole DOM and all C
1295
If your document doesn't have frames, use the C
1296
event instead.
1297
1298
If you leave out C<$event>, the value of C<< ->events() >> will
1299
be used instead.
1300
1301
=cut
1302
1303
sub _install_response_header_listener {
1304
0
0
my ($self) = @_;
1305
1306
0
weaken $self;
1307
1308
# Pre-Filter the progress on the JS side of things so we
1309
# don't get that much traffic back and forth between Perl and JS
1310
0
my $make_state_change_filter = $self->repl->declare(<<'JS');
1311
function (cb,console) {
1312
const STATE_START = Components.interfaces.nsIWebProgressListener.STATE_START;
1313
const STATE_STOP = Components.interfaces.nsIWebProgressListener.STATE_STOP;
1314
const STATE_TRANSFERRING = Components.interfaces.nsIWebProgressListener.STATE_TRANSFERRING;
1315
const STATE_IS_DOCUMENT = Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT;
1316
const STATE_IS_WINDOW = Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW;
1317
1318
return function (progress,request,flags,status) {
1319
if( 0 && console ) {
1320
const nsIChannel = Components.interfaces.nsIChannel;
1321
var ch = request.QueryInterface(nsIChannel);
1322
1323
console.log("STATE: "
1324
+ (flags & STATE_START ? "s" : "-")
1325
+ (flags & STATE_STOP ? "S" : "-")
1326
+ (flags & STATE_TRANSFERRING ? "T" : "-")
1327
+ (flags & STATE_IS_DOCUMENT ? "D" : "-")
1328
+ (flags & STATE_IS_WINDOW ? "W" : "-")
1329
+ " " + status
1330
+ " " + ch.originalURI.spec
1331
+ " -> " + ch.URI.spec
1332
);
1333
};
1334
// if (flags & (STATE_STOP|STATE_IS_WINDOW) == (STATE_STOP|STATE_IS_WINDOW)) {
1335
if (flags & (STATE_STOP|STATE_IS_DOCUMENT) == (STATE_STOP|STATE_IS_DOCUMENT)) {
1336
cb(progress,request,flags,status);
1337
} else if ((flags & STATE_STOP) == STATE_STOP) {
1338
cb(progress,request,flags,status);
1339
}
1340
}
1341
}
1342
JS
1343
1344
# These should be cached and optimized into one hash query
1345
0
my $STATE_STOP = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_STOP');
1346
0
my $STATE_IS_DOCUMENT = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT');
1347
0
my $STATE_IS_WINDOW = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW');
1348
1349
my $state_change = $make_state_change_filter->(sub {
1350
0
0
my ($progress,$request,$flags,$status) = @_;
1351
#warn sprintf "State : %032b %08x\n", $flags, $status;
1352
#warn sprintf " %032b\n", $STATE_STOP | $STATE_IS_DOCUMENT | $STATE_IS_WINDOW ;
1353
1354
0
0
0
if ( $STATE_STOP == $flags # some error
1355
or ($flags & ($STATE_STOP | $STATE_IS_DOCUMENT)) == ($STATE_STOP | $STATE_IS_DOCUMENT)) {
1356
0
0
if ($status == 0 ) {
1357
#warn "Storing request to response";
1358
#warn "URI ".$request->{URI}->{asciiSpec};
1359
0
0
$self->{ response } ||= $request;
1360
} else {
1361
#warn "Erasing response";
1362
0
undef $self->{ response };
1363
};
1364
};
1365
#}, $self->tab->{linkedBrowser}->{contentWindow}->{console}, $lock);
1366
0
}, $self->tab->{linkedBrowser}->{contentWindow}->{console});
1367
1368
0
my $browser = $self->tab->{linkedBrowser};
1369
1370
# These should mimick the LWP::UserAgent events maybe?
1371
0
return $self->progress_listener(
1372
$browser,
1373
onStateChange => $state_change,
1374
#onProgressChange => sub { print "Progress : @_\n" },
1375
#onLocationChange => sub { printf "Location : %s\n", $_[2]->{spec} },
1376
#onStatusChange => sub { print "Status : @_\n"; },
1377
);
1378
};
1379
1380
sub synchronize {
1381
0
0
1
my ($self,$events,$callback) = @_;
1382
0
0
0
if (ref $events and ref $events eq 'CODE') {
1383
0
$callback = $events;
1384
0
$events = $self->events;
1385
};
1386
1387
0
0
$events = [ $events ]
1388
unless ref $events;
1389
1390
0
undef $self->{response};
1391
1392
0
my $need_response = defined wantarray;
1393
0
my $response_catcher = $self->_install_response_header_listener();
1394
1395
0
my $load_lock = $self->_addLoadEventListener( tab => $self->tab, events => $events );
1396
0
$callback->();
1397
1398
0
my $ev = $self->_wait_while_busy($load_lock);
1399
0
0
if (my $h = $self->{on_event}) {
1400
0
0
if (ref $h eq 'CODE') {
1401
0
$h->($ev)
1402
} else {
1403
0
warn "Received $ev->{event}";
1404
#warn "$ev->{event}->{text}"";
1405
};
1406
};
1407
1408
# Clean up our event listener
1409
0
$load_lock->release;
1410
1411
0
undef $response_catcher;
1412
# Response catcher gets released here
1413
1414
0
$self->signal_http_status;
1415
0
0
if ($need_response) {
1416
0
return $self->response
1417
};
1418
};
1419
1420
=head2 C<< $mech->res() >> / C<< $mech->response(%options) >>
1421
1422
my $response = $mech->response(headers => 0);
1423
1424
Returns the current response as a L object.
1425
1426
The C option tells the module whether to fetch the headers
1427
from Firefox or not. This is mainly an internal optimization hack.
1428
1429
=cut
1430
1431
sub _headerVisitor {
1432
0
0
my ($self,$cb) = @_;
1433
0
my $obj = $self->repl->expr('new Object');
1434
0
$obj->{visitHeader} = $cb;
1435
0
$obj
1436
};
1437
1438
sub _extract_response {
1439
0
0
my ($self,$request,%options) = @_;
1440
1441
0
my $nsIHttpChannel = $self->repl->constant('Components.interfaces.nsIHttpChannel');
1442
0
my $httpChannel = $request->QueryInterface($nsIHttpChannel);
1443
1444
0
my @headers;
1445
0
0
if( $options{ headers }) {
1446
0
0
my $v = $self->_headerVisitor(sub{push @headers, @_});
0
1447
1448
# If this fails, we're calling it too early :-(
1449
0
$httpChannel->visitResponseHeaders($v);
1450
};
1451
1452
my $res = HTTP::Response->new(
1453
$httpChannel->{responseStatus},
1454
$httpChannel->{responseStatusText},
1455
0
\@headers,
1456
undef, # no body so far
1457
);
1458
0
return $res;
1459
};
1460
1461
sub response {
1462
0
0
1
my ($self, %options) = @_;
1463
1464
0
0
if( ! exists $options{ headers }) {
1465
0
$options{ headers } = 1;
1466
};
1467
1468
# If we still have a valid JS response,
1469
# create a HTTP::Response from that
1470
0
0
if (my $js_res = $self->{ response }) {
1471
#my $ouri = $js_res->{originalURI};
1472
0
my $ouri = $js_res->{URI};
1473
0
my $scheme = '';
1474
#warn "Reading response for ".$js_res->{URI}->{asciiSpec};
1475
#warn " original ".$js_res->{originalURI}->{asciiSpec};
1476
0
0
if ($ouri) {
1477
0
$scheme = $ouri->{scheme};
1478
};
1479
1480
0
0
0
if ($scheme and $scheme =~ /^https?/) {
0
0
1481
# We can only extract from a HTTP Response
1482
0
return $self->_extract_response( $js_res, %options );
1483
} elsif ($scheme and $scheme =~ /^(file|data|about)\b/) {
1484
# We're cool!
1485
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1486
} else {
1487
# We'll make up a response, below
1488
#my $url = $self->document->{documentURI};
1489
#carp "Making up a response for unknown URL scheme '$scheme' (from '$url')";
1490
};
1491
};
1492
1493
# Otherwise, make up a reason:
1494
0
my $eff_url = $self->document->{documentURI};
1495
#warn $eff_url;
1496
0
0
if ($eff_url =~ /^about:neterror/) {
1497
# this is an error
1498
0
return HTTP::Response->new(500)
1499
};
1500
1501
# We're cool, except we don't know what we're doing here:
1502
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1503
}
1504
*res = \&response;
1505
1506
=head2 C<< $mech->success() >>
1507
1508
$mech->get('http://google.com');
1509
print "Yay"
1510
if $mech->success();
1511
1512
Returns a boolean telling whether the last request was successful.
1513
If there hasn't been an operation yet, returns false.
1514
1515
This is a convenience function that wraps C<< $mech->res->is_success >>.
1516
1517
=cut
1518
1519
sub success {
1520
0
0
1
my $res = $_[0]->response( headers => 0 );
1521
0
0
$res and $res->is_success
1522
}
1523
1524
=head2 C<< $mech->status() >>
1525
1526
$mech->get('http://google.com');
1527
print $mech->status();
1528
# 200
1529
1530
Returns the HTTP status code of the response.
1531
This is a 3-digit number like 200 for OK, 404 for not found, and so on.
1532
1533
=cut
1534
1535
sub status {
1536
0
0
1
my ($self) = @_;
1537
0
return $self->response( headers => 0 )->code
1538
};
1539
1540
=head2 C<< $mech->reload( [$bypass_cache] ) >>
1541
1542
$mech->reload();
1543
1544
Reloads the current page. If C<$bypass_cache>
1545
is a true value, the browser is not allowed to
1546
use a cached page. This is the difference between
1547
pressing C (cached) and C (uncached).
1548
1549
Returns the (new) response.
1550
1551
=cut
1552
1553
sub reload {
1554
0
0
1
my ($self, $bypass_cache) = @_;
1555
0
0
$bypass_cache ||= 0;
1556
0
0
if ($bypass_cache) {
1557
0
$bypass_cache = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
1558
};
1559
$self->synchronize( sub {
1560
0
0
$self->tab->{linkedBrowser}->reloadWithFlags($bypass_cache);
1561
0
});
1562
}
1563
1564
# Internal convenience method for dipatching a call either synchronized
1565
# or not
1566
sub _sync_call {
1567
0
0
my ($self, $events, $cb) = @_;
1568
1569
0
0
if (@$events) {
1570
0
$self->synchronize( $events, $cb );
1571
} else {
1572
0
$cb->();
1573
};
1574
};
1575
1576
=head2 C<< $mech->back( [$synchronize] ) >>
1577
1578
$mech->back();
1579
1580
Goes one page back in the page history.
1581
1582
Returns the (new) response.
1583
1584
=cut
1585
1586
sub back {
1587
0
0
1
my ($self, $synchronize) = @_;
1588
0
0
$synchronize ||= (@_ != 2);
1589
0
0
if( !ref $synchronize ) {
1590
0
0
$synchronize = $synchronize
1591
? $self->events
1592
: []
1593
};
1594
1595
$self->_sync_call($synchronize, sub {
1596
0
0
$self->tab->{linkedBrowser}->goBack;
1597
0
});
1598
}
1599
1600
=head2 C<< $mech->forward( [$synchronize] ) >>
1601
1602
$mech->forward();
1603
1604
Goes one page forward in the page history.
1605
1606
Returns the (new) response.
1607
1608
=cut
1609
1610
sub forward {
1611
0
0
1
my ($self, $synchronize) = @_;
1612
0
0
$synchronize ||= (@_ != 2);
1613
0
0
if( !ref $synchronize ) {
1614
0
0
$synchronize = $synchronize
1615
? $self->events
1616
: []
1617
};
1618
1619
$self->_sync_call($synchronize, sub {
1620
0
0
$self->tab->{linkedBrowser}->goForward;
1621
0
});
1622
}
1623
1624
=head2 C<< $mech->uri() >>
1625
1626
print "We are at " . $mech->uri;
1627
1628
Returns the current document URI.
1629
1630
=cut
1631
1632
sub uri {
1633
0
0
1
my ($self) = @_;
1634
0
my $loc = $self->tab->MozRepl::RemoteObject::Methods::dive(qw[
1635
linkedBrowser
1636
currentURI
1637
asciiSpec ]);
1638
0
return URI->new( $loc );
1639
};
1640
1641
=head1 CONTENT METHODS
1642
1643
=head2 C<< $mech->document() >>
1644
1645
Returns the DOM document object.
1646
1647
This is WWW::Mechanize::Firefox specific.
1648
1649
=cut
1650
1651
sub document {
1652
0
0
1
my ($self) = @_;
1653
#$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentWindow document]);
1654
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentDocument]);
1655
}
1656
1657
=head2 C<< $mech->docshell() >>
1658
1659
my $ds = $mech->docshell;
1660
1661
Returns the C Javascript object associated with the tab.
1662
1663
This is WWW::Mechanize::Firefox specific.
1664
1665
=cut
1666
1667
sub docshell {
1668
0
0
1
my ($self) = @_;
1669
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser docShell]);
1670
}
1671
1672
=head2 C<< $mech->content( %options ) >>
1673
1674
print $mech->content;
1675
print $mech->content( format => 'html' ); # default
1676
print $mech->content( format => 'text' ); # identical to ->text
1677
1678
This always returns the content as a Unicode string. It tries
1679
to decode the raw content according to its input encoding.
1680
This currently only works for HTML pages, not for images etc.
1681
1682
Recognized options:
1683
1684
=over 4
1685
1686
=item *
1687
1688
C - the document to use.
1689
1690
Default is C<< $self->document >>.
1691
1692
=item *
1693
1694
C - the stuff to return
1695
1696
The allowed values are C and C. The default is C.
1697
1698
=back
1699
1700
=cut
1701
1702
sub content {
1703
0
0
1
my ($self, %options) = @_;
1704
0
0
$options{ format } ||= 'html';
1705
1706
0
0
my $d = delete $options{ document } || $self->document; # keep a reference to it!
1707
0
0
my $format = delete $options{ format } || 'html';
1708
0
my $content;
1709
1710
0
0
if( $format eq 'html' ) {
0
1711
0
my $html = $self->repl->declare(<<'JS', 'list');
1712
function(d){
1713
var e = d.createElement("div");
1714
e.appendChild(d.documentElement.cloneNode(true));
1715
return [e.innerHTML,d.inputEncoding];
1716
}
1717
JS
1718
# We return the raw bytes here.
1719
0
($content,my $encoding) = $html->($d);
1720
0
0
if (! utf8::is_utf8($content)) {
1721
#warn "Switching on UTF-8 (from $encoding)";
1722
# Switch on UTF-8 flag
1723
# This should never happen, as JSON::XS (and JSON) should always
1724
# already return proper UTF-8
1725
# But it does happen.
1726
0
$content = Encode::decode($encoding, $content);
1727
};
1728
} elsif ( $format eq 'text' ) {
1729
0
$content = $self->text;
1730
}
1731
else {
1732
0
$self->die( qq{Unknown "format" parameter "$format"} );
1733
}
1734
1735
0
return $content
1736
};
1737
1738
=head2 C<< $mech->text() >>
1739
1740
Returns the text of the current HTML content. If the content isn't
1741
HTML, $mech will die.
1742
1743
=cut
1744
1745
sub text {
1746
0
0
1
my $self = shift;
1747
1748
# Waugh - this is highly inefficient but conveniently short to write
1749
# Maybe this should skip SCRIPT nodes...
1750
0
join '', map { $_->{nodeValue} } $self->xpath('//*/text()');
0
1751
}
1752
1753
1754
=head2 C<< $mech->content_encoding() >>
1755
1756
print "The content is encoded as ", $mech->content_encoding;
1757
1758
Returns the encoding that the content is in. This can be used
1759
to convert the content from UTF-8 back to its native encoding.
1760
1761
=cut
1762
1763
sub content_encoding {
1764
0
0
1
my ($self, $d) = @_;
1765
0
0
$d ||= $self->document; # keep a reference to it!
1766
0
return $d->{inputEncoding};
1767
};
1768
1769
=head2 C<< $mech->update_html( $html ) >>
1770
1771
$mech->update_html($html);
1772
1773
Writes C<$html> into the current document. This is mostly
1774
implemented as a convenience method for L.
1775
1776
=cut
1777
1778
sub update_html {
1779
0
0
1
my ($self,$content) = @_;
1780
0
my $url = URI->new('data:');
1781
0
$url->media_type("text/html");
1782
0
$url->data($content);
1783
$self->synchronize($self->events, sub {
1784
0
0
$self->tab->{linkedBrowser}->loadURI("$url");
1785
0
});
1786
return
1787
0
};
1788
1789
=head2 C<< $mech->save_content( $localname [, $resource_directory] [, %options ] ) >>
1790
1791
$mech->get('http://google.com');
1792
$mech->save_content('google search page','google search page files');
1793
1794
Saves the given URL to the given filename. The URL will be
1795
fetched from the cache if possible, avoiding unnecessary network
1796
traffic.
1797
1798
If C<$resource_directory> is given, the whole page will be saved.
1799
All CSS, subframes and images
1800
will be saved into that directory, while the page HTML itself will
1801
still be saved in the file pointed to by C<$localname>.
1802
1803
Returns a C object through which you can cancel the
1804
download by calling its C<< ->cancelSave >> method. Also, you can poll
1805
the download status through the C<< ->{currentState} >> property.
1806
1807
If you need to set persist flags pass the unsigned long value in the
1808
C option.
1809
1810
$mech->get('http://zombisoft.com');
1811
$mech->save_content('Zombisoft','zombisoft-resource-files', "persist" => 512 | 2048);
1812
1813
A list of flags and their values can be found at
1814
L.
1815
1816
If you are interested in the intermediate download progress, create
1817
a ProgressListener through C<< $mech->progress_listener >>
1818
and pass it in the C option.
1819
1820
The download will
1821
continue in the background. It will not show up in the
1822
Download Manager.
1823
1824
=cut
1825
1826
sub save_content {
1827
0
0
1
my ($self,$localname,$resource_directory,%options) = @_;
1828
1829
0
$localname = File::Spec->rel2abs($localname, '.');
1830
# Touch the file
1831
0
0
if (! -f $localname) {
1832
0
0
open my $fh, '>', $localname
1833
or die "Couldn't create '$localname': $!";
1834
};
1835
1836
0
0
if ($resource_directory) {
1837
0
$resource_directory = File::Spec->rel2abs($resource_directory, '.');
1838
1839
# Create the directory
1840
0
0
if (! -d $resource_directory) {
1841
0
0
mkdir $resource_directory
1842
or die "Couldn't create '$resource_directory': $!";
1843
};
1844
};
1845
1846
0
my $transfer_file = $self->repl->declare(<<'JS');
1847
function (document,filetarget,rscdir,progress,persistflags) {
1848
//new file object
1849
var obj_target;
1850
if (filetarget) {
1851
obj_target = Components.classes["@mozilla.org/file/local;1"]
1852
.createInstance(Components.interfaces.nsILocalFile);
1853
};
1854
1855
//set file with path
1856
obj_target.initWithPath(filetarget);
1857
1858
var obj_rscdir;
1859
if (rscdir) {
1860
obj_rscdir = Components.classes["@mozilla.org/file/local;1"]
1861
.createInstance(Components.interfaces.nsILocalFile);
1862
obj_rscdir.initWithPath(rscdir);
1863
};
1864
1865
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1866
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1867
1868
// with persist flags if desired
1869
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1870
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1871
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1872
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1873
| persistflags
1874
;
1875
1876
obj_Persist.progressListener = progress;
1877
1878
//save file to target
1879
obj_Persist.saveDocument(document,obj_target, obj_rscdir, null,0,0);
1880
return obj_Persist
1881
};
1882
JS
1883
#warn "=> $localname / $resource_directory";
1884
$transfer_file->(
1885
$self->document,
1886
$localname,
1887
$resource_directory,
1888
$options{progress},
1889
$options{persist}
1890
0
);
1891
}
1892
1893
=head2 C<< $mech->save_url( $url, $localname, [%options] ) >>
1894
1895
$mech->save_url('http://google.com','google_index.html');
1896
1897
Saves the given URL to the given filename. The URL will be
1898
fetched from the cache if possible, avoiding unnecessary network
1899
traffic.
1900
1901
If you are interested in the intermediate download progress, create
1902
a ProgressListener through C<< $mech->progress_listener >>
1903
and pass it in the C option.
1904
The download will
1905
continue in the background. It will also not show up in the
1906
Download Manager.
1907
1908
If the C option is not passed in, C< ->save_url >
1909
will only return after the download has finished.
1910
1911
Returns a C object through which you can cancel the
1912
download by calling its C<< ->cancelSave >> method. Also, you can poll
1913
the download status through the C<< ->{currentState} >> property.
1914
1915
=cut
1916
1917
sub save_url {
1918
0
0
1
my ($self,$url,$localname,%options) = @_;
1919
1920
0
$localname = File::Spec->rel2abs($localname, '.');
1921
1922
0
0
if (! -f $localname) {
1923
0
0
open my $fh, '>', $localname
1924
or die "Couldn't create '$localname': $!";
1925
};
1926
1927
0
my $res;
1928
0
0
if( ! $options{ progress }) {
1929
0
$options{ wait } = 1;
1930
# We will do a synchronous download
1931
0
my $STATE_FINISHED = $self->repl->constant('Components.interfaces.nsIWebBrowserPersist.PERSIST_STATE_FINISHED');
1932
$options{ progress }= $self->make_progress_listener(onStateChange => sub {
1933
0
0
my ($webprogress,$request,$flags,$status) = @_;
1934
0
0
if( $res->{currentState} == $STATE_FINISHED) {
1935
0
$options{ wait }= 0;
1936
};
1937
},
1938
# onProgressChange => sub {
1939
# my ($aWebProgress, $aRequest, $aCurSelfProgress, $aMaxSelfProgress, $aCurTotalProgress, $aMaxTotalProgress)= @_;
1940
#diag sprintf "%03.2f", $aCurTotalProgress / ($aMaxTotalProgress||1) * 100;
1941
#}
1942
0
);
1943
};
1944
1945
0
my $transfer_file = $self->repl->declare(<<'JS');
1946
function (source,filetarget,progress,tab) {
1947
//new obj_URI object
1948
var ios = Components.classes["@mozilla.org/network/io-service;1"]
1949
.getService(Components.interfaces.nsIIOService)
1950
var obj_URI = ios.newURI(source, null, null);
1951
1952
//new file object
1953
var obj_target;
1954
if (filetarget) {
1955
obj_target = Components.classes["@mozilla.org/file/local;1"]
1956
.createInstance(Components.interfaces.nsILocalFile);
1957
};
1958
1959
//set file with path
1960
obj_target.initWithPath(filetarget);
1961
1962
//new persistence object
1963
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1964
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1965
1966
// with persist flags if desired
1967
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1968
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1969
// Also make it send the proper cookies
1970
// If we are on a 3.0 Firefox, PERSIST_FLAGS_FORCE_ALLOW_COOKIES does
1971
// not exist, so we need to get creative:
1972
1973
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1974
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1975
;
1976
obj_Persist.progressListener = progress;
1977
/* {
1978
"onStateChange": function() {
1979
var myargs= Array.slice(arguments);
1980
alert("onStateChange (" + myargs.join(",")+")");
1981
try {
1982
progress.onStateChange.apply(null,arguments);
1983
} catch(e) {
1984
alert(e.message);
1985
};
1986
},
1987
"onProgressChange": function() {
1988
var myargs= Array.slice(arguments);
1989
alert("onProgressChange (" + myargs.join(",")+")");
1990
try {
1991
progress.onProgressChange.apply(null,arguments);
1992
} catch(e) {
1993
alert(e.message);
1994
};
1995
}
1996
};
1997
*/
1998
1999
// Since Firefox 18, we need to provide a proper privacyContext
2000
// This is cobbled together from half-documented parts in various places
2001
// of the Mozilla documentation. The changes file does not list the
2002
// necessary steps :-(
2003
// https://developer.mozilla.org/en-US/docs/Supporting_per-window_private_browsing
2004
// The documentation is even wrong. It recommends to import("chrome://gre/modules/PrivateBrowsingUtils.jsm")
2005
// but the correct URL is "resource://gre/modules/PrivateBrowsingUtils.jsm".
2006
// Also, the method is not named "getPrivacyContextFromWindow" but "privacyContextFromWindow".
2007
var privacyContext;
2008
var version = Components.classes["@mozilla.org/xre/app-info;1"]
2009
.getService(Components.interfaces.nsIXULAppInfo).version;
2010
if( version >= 18.0 ) {
2011
Components.utils.import("resource://gre/modules/PrivateBrowsingUtils.jsm");
2012
privacyContext = PrivateBrowsingUtils.privacyContextFromWindow(tab.linkedBrowser.contentDocument.defaultView);
2013
};
2014
2015
//save file to target
2016
if( version < 36.0 ) {
2017
obj_Persist.saveURI(obj_URI,null,null,null,null,obj_target,privacyContext);
2018
} else {
2019
obj_Persist.saveURI(obj_URI,null,null, ios.referrerPolicy, null,null,obj_target,privacyContext);
2020
}
2021
return obj_Persist
2022
};
2023
JS
2024
2025
0
$res= $transfer_file->("$url" => $localname, $options{progress}, $self->tab);
2026
0
while( $options{ wait }) {
2027
0
$self->repl->poll;
2028
sleep 1
2029
0
0
if $options{ wait };
2030
};
2031
0
$res
2032
}
2033
2034
=head2 C<< $mech->base() >>
2035
2036
print $mech->base;
2037
2038
Returns the URL base for the current page.
2039
2040
The base is either specified through a C
2041
tag or is the current URL.
2042
2043
This method is specific to WWW::Mechanize::Firefox
2044
2045
=cut
2046
2047
sub base {
2048
0
0
1
my ($self) = @_;
2049
0
(my $base) = $self->selector('base');
2050
$base = $base->{href}
2051
0
0
if $base;
2052
0
0
$base ||= $self->uri;
2053
};
2054
2055
=head2 C<< $mech->content_type() >>
2056
2057
=head2 C<< $mech->ct() >>
2058
2059
print $mech->content_type;
2060
2061
Returns the content type of the currently loaded document
2062
2063
=cut
2064
2065
sub content_type {
2066
0
0
1
my ($self) = @_;
2067
0
return $self->document->{contentType};
2068
};
2069
2070
*ct = \&content_type;
2071
2072
=head2 C<< $mech->is_html() >>
2073
2074
print $mech->is_html();
2075
2076
Returns true/false on whether our content is HTML, according to the
2077
HTTP headers.
2078
2079
=cut
2080
2081
sub is_html {
2082
0
0
1
my $self = shift;
2083
0
0
return defined $self->ct && ($self->ct eq 'text/html');
2084
}
2085
2086
=head2 C<< $mech->title() >>
2087
2088
print "We are on page " . $mech->title;
2089
2090
Returns the current document title.
2091
2092
=cut
2093
2094
sub title {
2095
0
0
1
my ($self) = @_;
2096
0
return $self->document->{title};
2097
};
2098
2099
=head1 EXTRACTION METHODS
2100
2101
=head2 C<< $mech->links() >>
2102
2103
print $_->text . " -> " . $_->url . "\n"
2104
for $mech->links;
2105
2106
Returns all links in the document as L objects.
2107
2108
Currently accepts no parameters. See C<< ->xpath >>
2109
or C<< ->selector >> when you want more control.
2110
2111
=cut
2112
2113
%link_spec = (
2114
a => { url => 'href', },
2115
area => { url => 'href', },
2116
frame => { url => 'src', },
2117
iframe => { url => 'src', },
2118
link => { url => 'href', },
2119
meta => { url => 'content', xpath => (join '',
2120
q{translate(@http-equiv,'ABCDEFGHIJKLMNOPQRSTUVWXYZ',},
2121
q{'abcdefghijklmnopqrstuvwxyz')="refresh"}), },
2122
);
2123
2124
# taken from WWW::Mechanize. This should possibly just be reused there
2125
sub make_link {
2126
0
0
0
my ($self,$node,$base) = @_;
2127
0
my $tag = lc $node->{tagName};
2128
2129
0
0
if (! exists $link_spec{ $tag }) {
2130
0
warn "Unknown tag '$tag'";
2131
};
2132
0
my $url = $node->{ $link_spec{ $tag }->{url} };
2133
2134
0
0
if ($tag eq 'meta') {
2135
0
my $content = $url;
2136
0
0
if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) {
2137
0
$url = $1;
2138
0
0
$url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/;
2139
}
2140
else {
2141
0
undef $url;
2142
}
2143
};
2144
2145
0
0
if (defined $url) {
2146
my $res = WWW::Mechanize::Link->new({
2147
tag => $tag,
2148
name => $node->{name},
2149
base => $base,
2150
url => $url,
2151
text => $node->{innerHTML},
2152
0
attrs => {},
2153
});
2154
2155
0
$res
2156
} else {
2157
()
2158
0
};
2159
}
2160
2161
sub links {
2162
0
0
1
my ($self) = @_;
2163
0
my @links = $self->selector( join ",", sort keys %link_spec);
2164
0
my $base = $self->base;
2165
return map {
2166
0
$self->make_link($_,$base)
0
2167
} @links;
2168
};
2169
2170
# Call croak or carp, depending on the C< autodie > setting
2171
sub signal_condition {
2172
0
0
0
my ($self,$msg) = @_;
2173
0
0
if ($self->{autodie}) {
2174
0
croak $msg
2175
} else {
2176
0
carp $msg
2177
}
2178
};
2179
2180
# Call croak on the C< autodie > setting if we have a non-200 status
2181
sub signal_http_status {
2182
0
0
0
my ($self) = @_;
2183
0
0
if ($self->{autodie}) {
2184
0
0
if ($self->status !~ /^2/) {
2185
# there was an error
2186
0
0
croak ($self->response(headers => 0)->message || sprintf "Got status code %d", $self->status );
2187
};
2188
} else {
2189
# silent
2190
}
2191
};
2192
2193
=head2 C<< $mech->find_link_dom( %options ) >>
2194
2195
print $_->{innerHTML} . "\n"
2196
for $mech->find_link_dom( text_contains => 'CPAN' );
2197
2198
A method to find links, like L's
2199
C<< ->find_links >> method. This method returns DOM objects from
2200
Firefox instead of WWW::Mechanize::Link objects.
2201
2202
Note that Firefox
2203
might have reordered the links or frame links in the document
2204
so the absolute numbers passed via C
2205
might not be the same between
2206
L and L.
2207
2208
Returns the DOM object as L::Instance.
2209
2210
The supported options are:
2211
2212
=over 4
2213
2214
=item *
2215
2216
C<< text >> and C<< text_contains >> and C<< text_regex >>
2217
2218
Match the text of the link as a complete string, substring or regular expression.
2219
2220
Matching as a complete string or substring is a bit faster, as it is
2221
done in the XPath engine of Firefox.
2222
2223
=item *
2224
2225
C<< id >> and C<< id_contains >> and C<< id_regex >>
2226
2227
Matches the C attribute of the link completely or as part
2228
2229
=item *
2230
2231
C<< name >> and C<< name_contains >> and C<< name_regex >>
2232
2233
Matches the C attribute of the link
2234
2235
=item *
2236
2237
C<< url >> and C<< url_regex >>
2238
2239
Matches the URL attribute of the link (C, C or C).
2240
2241
=item *
2242
2243
C<< class >> - the C attribute of the link
2244
2245
=item *
2246
2247
C<< n >> - the (1-based) index. Defaults to returning the first link.
2248
2249
=item *
2250
2251
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2252
or carp, depending on the C parameter.
2253
2254
=item *
2255
2256
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2257
or carp, depending on the C parameter.
2258
2259
The method Cs if no link is found. If the C option is true,
2260
it also Cs when more than one link is found.
2261
2262
=back
2263
2264
=cut
2265
2266
80
80
399
use vars '%xpath_quote';
80
104
80
330733
2267
%xpath_quote = (
2268
'"' => '\"',
2269
#"'" => "\\'",
2270
#'[' => '[',
2271
#']' => ']',
2272
#'[' => '[\[]',
2273
#'[' => '\[',
2274
#']' => '[\]]',
2275
);
2276
2277
# Return the default limiter if no other limiting option is set:
2278
sub _default_limiter {
2279
0
0
my ($default, $options) = @_;
2280
0
0
if (! grep { exists $options->{ $_ } } qw(single one maybe all any)) {
0
2281
0
$options->{ $default } = 1;
2282
};
2283
return ()
2284
0
};
2285
2286
sub quote_xpath($) {
2287
0
0
0
local $_ = $_[0];
2288
0
0
s/(['"\[\]])/$xpath_quote{$1} || $1/ge;
0
2289
0
$_
2290
};
2291
2292
#sub perl_regex_to_xpath($) {
2293
# my ($re) = @_;
2294
# my $flags = '';
2295
# warn $re;
2296
# $re =~ s!^\(\?([a-z]*)\-[a-z]*:(.*)\)$!$2!
2297
# and $flags = $1;
2298
# warn qq{=> XPATH: "$re" , "$flags"};
2299
# ($re, $flags)
2300
#};
2301
2302
sub find_link_dom {
2303
0
0
1
my ($self,%opts) = @_;
2304
0
my %xpath_options;
2305
2306
0
for (qw(node document frames)) {
2307
# Copy over XPath options that were passed in
2308
0
0
if (exists $opts{ $_ }) {
2309
0
$xpath_options{ $_ } = delete $opts{ $_ };
2310
};
2311
};
2312
2313
0
my $single = delete $opts{ single };
2314
0
0
my $one = delete $opts{ one } || $single;
2315
0
0
0
if ($single and exists $opts{ n }) {
2316
0
croak "It doesn't make sense to use 'single' and 'n' option together"
2317
};
2318
0
0
my $n = (delete $opts{ n } || 1);
2319
0
0
$n--
2320
if ($n ne 'all'); # 1-based indexing
2321
0
my @spec;
2322
2323
# Decode text and text_contains into XPath
2324
0
for my $lvalue (qw( text id name class )) {
2325
0
my %lefthand = (
2326
text => 'text()',
2327
);
2328
0
my %match_op = (
2329
'' => q{%s="%s"},
2330
'contains' => q{contains(%s,"%s")},
2331
# Ideally we would also handle *_regex here, but Firefox XPath
2332
# does not support fn:matches() :-(
2333
#'regex' => q{matches(%s,"%s","%s")},
2334
);
2335
0
0
my $lhs = $lefthand{ $lvalue } || '@'.$lvalue;
2336
0
for my $op (keys %match_op) {
2337
0
my $v = $match_op{ $op };
2338
0
0
$op = '_'.$op if length($op);
2339
0
my $key = "${lvalue}$op";
2340
2341
0
0
if (exists $opts{ $key }) {
2342
0
my $p = delete $opts{ $key };
2343
0
push @spec, sprintf $v, $lhs, $p;
2344
};
2345
};
2346
};
2347
2348
0
0
if (my $p = delete $opts{ url }) {
2349
0
push @spec, sprintf '@href = "%s" or @src="%s"', quote_xpath $p, quote_xpath $p;
2350
}
2351
0
my @tags = (sort keys %link_spec);
2352
0
0
if (my $p = delete $opts{ tag }) {
2353
0
@tags = $p;
2354
};
2355
0
0
if (my $p = delete $opts{ tag_regex }) {
2356
0
@tags = grep /$p/, @tags;
2357
};
2358
2359
my $q = join '|',
2360
map {
2361
0
my @full = map {qq{($_)}} grep {defined} (@spec, $link_spec{$_}->{xpath});
0
0
0
2362
0
0
if (@full) {
2363
0
sprintf "//%s[%s]", $_, join " and ", @full;
2364
} else {
2365
0
sprintf "//%s", $_
2366
};
2367
} (@tags);
2368
#warn $q;
2369
2370
0
my @res = $self->xpath($q, %xpath_options );
2371
2372
0
0
if (keys %opts) {
2373
# post-filter the remaining links through WWW::Mechanize
2374
# for all the options we don't support with XPath
2375
2376
0
my $base = $self->base;
2377
0
require WWW::Mechanize;
2378
@res = grep {
2379
0
WWW::Mechanize::_match_any_link_parms($self->make_link($_,$base),\%opts)
0
2380
} @res;
2381
};
2382
2383
0
0
if ($one) {
2384
0
0
if (0 == @res) { $self->signal_condition( "No link found matching '$q'" )};
0
2385
0
0
if ($single) {
2386
0
0
if (1 < @res) {
2387
0
$self->highlight_node(@res);
2388
0
$self->signal_condition(
2389
sprintf "%d elements found found matching '%s'", scalar @res, $q
2390
);
2391
};
2392
};
2393
};
2394
2395
0
0
if ($n eq 'all') {
2396
return @res
2397
0
};
2398
0
$res[$n]
2399
}
2400
2401
=head2 C<< $mech->find_link( %options ) >>
2402
2403
print $_->text . "\n"
2404
for $mech->find_link( text_contains => 'CPAN' );
2405
2406
A method quite similar to L's method.
2407
The options are documented in C<< ->find_link_dom >>.
2408
2409
Returns a L object.
2410
2411
This defaults to not look through child frames.
2412
2413
=cut
2414
2415
sub find_link {
2416
0
0
1
my ($self,%opts) = @_;
2417
0
my $base = $self->base;
2418
croak "Option 'all' not available for ->find_link. Did you mean to call ->find_all_links()?"
2419
0
0
0
if 'all' eq ($opts{n} || '');
2420
0
0
if (my $link = $self->find_link_dom(frames => 0, %opts)) {
2421
0
return $self->make_link($link, $base)
2422
} else {
2423
return
2424
0
};
2425
};
2426
2427
=head2 C<< $mech->find_all_links( %options ) >>
2428
2429
print $_->text . "\n"
2430
for $mech->find_all_links( text_regex => qr/google/i );
2431
2432
Finds all links in the document.
2433
The options are documented in C<< ->find_link_dom >>.
2434
2435
Returns them as list or an array reference, depending
2436
on context.
2437
2438
This defaults to not look through child frames.
2439
2440
=cut
2441
2442
sub find_all_links {
2443
0
0
1
my ($self, %opts) = @_;
2444
0
$opts{ n } = 'all';
2445
0
my $base = $self->base;
2446
my @matches = map {
2447
0
$self->make_link($_, $base);
0
2448
} $self->find_all_links_dom( frames => 0, %opts );
2449
0
0
return @matches if wantarray;
2450
0
return \@matches;
2451
};
2452
2453
=head2 C<< $mech->find_all_links_dom %options >>
2454
2455
print $_->{innerHTML} . "\n"
2456
for $mech->find_all_links_dom( text_regex => qr/google/i );
2457
2458
Finds all matching linky DOM nodes in the document.
2459
The options are documented in C<< ->find_link_dom >>.
2460
2461
Returns them as list or an array reference, depending
2462
on context.
2463
2464
This defaults to not look through child frames.
2465
2466
=cut
2467
2468
sub find_all_links_dom {
2469
0
0
1
my ($self,%opts) = @_;
2470
0
$opts{ n } = 'all';
2471
0
my @matches = $self->find_link_dom( frames => 0, %opts );
2472
0
0
return @matches if wantarray;
2473
0
return \@matches;
2474
};
2475
2476
=head2 C<< $mech->follow_link( $link ) >>
2477
2478
=head2 C<< $mech->follow_link( %options ) >>
2479
2480
$mech->follow_link( xpath => '//a[text() = "Click here!"]' );
2481
2482
Follows the given link. Takes the same parameters that C
2483
uses. In addition, C can be passed to (not) force
2484
waiting for a new page to be loaded.
2485
2486
Note that C<< ->follow_link >> will only try to follow link-like
2487
things like C tags.
2488
2489
=cut
2490
2491
sub follow_link {
2492
0
0
1
my ($self,$link,%opts);
2493
0
0
if (@_ == 2) { # assume only a link parameter
2494
0
($self,$link) = @_;
2495
0
$self->click($link);
2496
} else {
2497
0
($self,%opts) = @_;
2498
0
_default_limiter( one => \%opts );
2499
0
$link = $self->find_link_dom(%opts);
2500
0
$self->click({ dom => $link, %opts });
2501
}
2502
}
2503
2504
=head2 C<< $mech->xpath( $query, %options ) >>
2505
2506
my $link = $mech->xpath('//a[id="clickme"]', one => 1);
2507
# croaks if there is no link or more than one link found
2508
2509
my @para = $mech->xpath('//p');
2510
# Collects all paragraphs
2511
2512
my @para_text = $mech->xpath('//p/text()', type => $mech->xpathResult('STRING_TYPE'));
2513
# Collects all paragraphs as text
2514
2515
Runs an XPath query in Firefox against the current document.
2516
2517
If you need more information about the returned results,
2518
use the C<< ->xpathEx() >> function.
2519
2520
The options allow the following keys:
2521
2522
=over 4
2523
2524
=item *
2525
2526
C<< document >> - document in which the query is to be executed. Use this to
2527
search a node within a specific subframe of C<< $mech->document >>.
2528
2529
=item *
2530
2531
C<< frames >> - if true, search all documents in all frames and iframes.
2532
This may or may not conflict with C. This will default to the
2533
C setting of the WWW::Mechanize::Firefox object.
2534
2535
=item *
2536
2537
C<< node >> - node relative to which the query is to be executed. Note
2538
that you will have to use a relative XPath expression as well. Use
2539
2540
.//foo
2541
2542
instead of
2543
2544
//foo
2545
2546
=item *
2547
2548
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2549
or carp, depending on the C parameter.
2550
2551
=item *
2552
2553
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2554
or carp, depending on the C parameter.
2555
2556
=item *
2557
2558
C<< maybe >> - If true, ensure that at most one element is found. Otherwise
2559
croak or carp, depending on the C parameter.
2560
2561
=item *
2562
2563
C<< all >> - If true, return all elements found. This is the default.
2564
You can use this option if you want to use C<< ->xpath >> in scalar context
2565
to count the number of matched elements, as it will otherwise emit a warning
2566
for each usage in scalar context without any of the above restricting options.
2567
2568
=item *
2569
2570
C<< any >> - no error is raised, no matter if an item is found or not.
2571
2572
=item *
2573
2574
C<< type >> - force the return type of the query.
2575
2576
type => $mech->xpathResult('ORDERED_NODE_SNAPSHOT_TYPE'),
2577
2578
WWW::Mechanize::Firefox tries a best effort in giving you the appropriate
2579
result of your query, be it a DOM node or a string or a number. In the case
2580
you need to restrict the return type, you can pass this in.
2581
2582
The allowed strings are documented in the MDN. Interesting types are
2583
2584
ANY_TYPE (default, uses whatever things the query returns)
2585
STRING_TYPE
2586
NUMBER_TYPE
2587
ORDERED_NODE_SNAPSHOT_TYPE
2588
2589
=back
2590
2591
Returns the matched results.
2592
2593
You can pass in a list of queries as an array reference for the first parameter.
2594
The result will then be the list of all elements matching any of the queries.
2595
2596
This is a method that is not implemented in WWW::Mechanize.
2597
2598
In the long run, this should go into a general plugin for
2599
L.
2600
2601
=cut
2602
2603
sub xpath {
2604
0
0
1
my ($self,$query,%options) = @_;
2605
2606
0
my $single = $options{ single };
2607
0
my $first = $options{ one };
2608
0
my $maybe = $options{ maybe };
2609
0
my $any = $options{ any };
2610
0
0
my $return_first_element = ($single or $first or $maybe or $any );
2611
2612
# Construct some helper variables
2613
0
0
my $zero_allowed = not ($single or $first);
2614
0
0
my $two_allowed = not( $single or $maybe);
2615
2616
# Sanity check for the common error of
2617
# my $item = $mech->xpath("//foo");
2618
0
0
0
if (! exists $options{ all } and not ($return_first_element)) {
2619
0
0
0
$self->signal_condition(join "\n",
2620
"You asked for many elements but seem to only want a single item.",
2621
"Did you forget to pass the 'single' option with a true value?",
2622
"Pass 'all => 1' to suppress this message and receive the count of items.",
2623
) if defined wantarray and !wantarray;
2624
};
2625
2626
# How can we return here a set of strings
2627
# if we don't return an array in .result?!
2628
my @res= map {
2629
!defined $_->{resultType}
2630
? ()
2631
: $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_SNAPSHOT_TYPE }
2632
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_SNAPSHOT_TYPE }
2633
|| $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_ITERATOR_TYPE }
2634
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_ITERATOR_TYPE }
2635
0
? @{ $_->{result} }
2636
: $_->{ result }
2637
0
0
0
} $self->xpathEx(
0
2638
$query,
2639
#type => $self->{XpathResultTypes}->{ORDERED_NODE_SNAPSHOT_TYPE},
2640
type => $self->{XpathResultTypes}->{ANY_TYPE},
2641
0
return_first => $return_first_element,
2642
%options
2643
);
2644
2645
0
0
0
if (! $zero_allowed and @res == 0) {
2646
0
0
$options{ user_info } ||= $query;
2647
0
$self->signal_condition( "No elements found for $options{ user_info }" );
2648
};
2649
2650
0
0
0
if (! $two_allowed and @res > 1) {
2651
0
0
$options{ user_info } ||= $query;
2652
0
$self->highlight_node(@res);
2653
0
$self->signal_condition( (scalar @res) . " elements found for $options{ user_info }" );
2654
};
2655
2656
0
0
$return_first_element ? $res[0] : @res
2657
};
2658
2659
sub _initXpathResultTypes {
2660
0
0
my( $self )= @_;
2661
$self->{XpathResultTypes} ||= {
2662
0
0
ANY_TYPE => $self->repl->constant('XPathResult.ANY_TYPE'),
2663
NUMBER_TYPE => $self->repl->constant('XPathResult.NUMBER_TYPE'),
2664
STRING_TYPE => $self->repl->constant('XPathResult.STRING_TYPE'),
2665
BOOLEAN_TYPE => $self->repl->constant('XPathResult.BOOLEAN_TYPE'),
2666
UNORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_ITERATOR_TYPE'),
2667
ORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_ITERATOR_TYPE'),
2668
UNORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE'),
2669
ORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_SNAPSHOT_TYPE'),
2670
ANY_UNORDERED_TYPE => $self->repl->constant('XPathResult.ANY_UNORDERED_NODE_TYPE'),
2671
FIRST_ORDERED_NODE_TYPE => $self->repl->constant('XPathResult.FIRST_ORDERED_NODE_TYPE'),
2672
};
2673
2674
0
$self->{XpathResultTypenames} = +{ reverse %{ $self->{XpathResultTypes} } };
0
2675
};
2676
2677
0
0
0
sub xpathResultType { $_[0]->{ XpathResultTypenames }->{ $_[1] } };
2678
0
0
0
sub xpathResult { $_[0]->{XpathResultTypes}{$_[1]}; }
2679
2680
=head2 C<< $mech->xpathEx( $query, %options ) >>
2681
2682
my @links = $mech->xpathEx('//a[id="clickme"]');
2683
2684
Runs an XPath query in Firefox against a document. Returns a list
2685
of found elements. Each element in the result has the following properties:
2686
2687
=over 4
2688
2689
=item *
2690
2691
C<< resultType >> - the type of the result. The numerical value of C<< $mech->xpathResult() >>.
2692
2693
=item *
2694
2695
C<< resultSize >> - the number of elements in this result. This is 1 for atomic results like
2696
strings or numbers, and the number of elements for nodesets.
2697
2698
=item *
2699
2700
C<< result >> - the best result available. This is the nodeset
2701
or the text or number, depending on the query.
2702
2703
=back
2704
2705
=cut
2706
2707
sub xpathEx {
2708
# Returns verbose information about how things matched
2709
0
0
1
my ($self, $query, %options) = @_;
2710
2711
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2712
0
$query = [$query];
2713
};
2714
2715
0
0
if ($options{ node }) {
2716
0
0
$options{ document } ||= $options{ node }->{ownerDocument};
2717
#warn "Have node, searching below node";
2718
} else {
2719
0
0
$options{ document } ||= $self->document;
2720
#warn "Searching below given document";
2721
#$options{node} = $options{document};
2722
};
2723
2724
0
0
$options{type} ||= $self->{XpathResult}->{ANY_TYPE};
2725
2726
0
0
$options{ user_info } ||= join " or ", map {qq{'$_'}} @$query;
0
2727
2728
# Sanity check for the common error of
2729
# my $item = $mech->xpathEx("//foo");
2730
0
0
if (! wantarray) {
2731
0
$self->signal_condition(join "\n",
2732
"->xpathEx needs to be called in list context.",
2733
);
2734
};
2735
2736
0
0
if (not exists $options{ frames }) {
2737
0
$options{frames} = $self->{frames};
2738
};
2739
2740
0
my $query_xpath = $self->repl->declare(<<'JS');
2741
function(doc, q, ref, type) {
2742
var xpr = doc.evaluate(q, ref, null, type, null);
2743
var r = { resultType: xpr.resultType, resultSize: 0, result: null };
2744
switch(xpr.resultType) {
2745
case XPathResult.NUMBER_TYPE:
2746
r.result= r.numberValue = xpr.numberValue;
2747
r.resultSize= 1;
2748
break;
2749
case XPathResult.STRING_TYPE:
2750
r.result= r.stringValue = xpr.stringValue;
2751
r.resultSize= 1;
2752
break;
2753
case XPathResult.BOOLEAN_TYPE:
2754
r.result= r.booleanValue = xpr.booleanValue;
2755
r.resultSize= 1;
2756
break;
2757
case XPathResult.UNORDERED_NODE_ITERATOR_TYPE:
2758
case XPathResult.ORDERED_NODE_ITERATOR_TYPE:
2759
r.result= r.nodeSet = [];
2760
var n;
2761
while (n = xpr.iterateNext()) {
2762
r.nodeSet.push(n);
2763
r.resultSize++;
2764
}
2765
break;
2766
case XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE:
2767
case XPathResult.ORDERED_NODE_SNAPSHOT_TYPE:
2768
r.result= r.nodeSet = [];
2769
r.resultSize= xpr.snapshotLength;
2770
for (var i = 0 ; i < xpr.snapshotLength; i++ ) {
2771
r.nodeSet[i] = xpr.snapshotItem(i);
2772
}
2773
break;
2774
case XPathResult.ANY_UNORDERED_NODE_TYPE:
2775
case XPathResult.FIRST_ORDERED_NODE_TYPE:
2776
r.result= r.singleNodeValue = xpr.singleNodeValue;
2777
r.resultSize= 1;
2778
break;
2779
default:
2780
break;
2781
}
2782
return r;
2783
}
2784
JS
2785
2786
0
my @res;
2787
2788
DOCUMENTS: {
2789
0
my @documents = $options{ document };
0
2790
#warn "Invalid root document" unless $options{ document };
2791
2792
# recursively join the results of sub(i)frames if wanted
2793
# This should maybe go into the loop to expand every frame as we descend
2794
# into the available subframes
2795
2796
0
while (@documents) {
2797
0
my $doc = shift @documents;
2798
#warn "Invalid document" unless $doc;
2799
2800
0
0
my $n = $options{ node } || $doc;
2801
#warn ">Searching @$query in $doc->{title}";
2802
# Munge the multiple @$queries into one:
2803
0
my $q = join "|", @$query;
2804
#warn $q;
2805
0
my @found = $query_xpath->($doc, $q, $n, $options{type});
2806
0
push @res, @found;
2807
2808
# A small optimization to return if we already have enough elements
2809
# We can't do this on $return_first as there might be more elements
2810
0
0
0
if( @res and $options{ return_first } and grep { $_->{resultSize} } @res ) {
0
0
2811
0
@res= grep { $_->{resultSize} } @res;
0
2812
0
last DOCUMENTS;
2813
};
2814
2815
0
0
0
if ($options{ frames } and not $options{ node }) {
2816
#warn ">Expanding below " . $doc->{title};
2817
#local $nesting .= "--";
2818
0
my @d = $self->expand_frames( $options{ frames }, $doc );
2819
#warn "Found $_->{title}" for @d;
2820
0
push @documents, @d;
2821
};
2822
};
2823
};
2824
2825
@res
2826
0
}
2827
2828
=head2 C<< $mech->selector( $css_selector, %options ) >>
2829
2830
my @text = $mech->selector('p.content');
2831
2832
Returns all nodes matching the given CSS selector. If
2833
C<$css_selector> is an array reference, it returns
2834
all nodes matched by any of the CSS selectors in the array.
2835
2836
This takes the same options that C<< ->xpath >> does.
2837
2838
In the long run, this should go into a general plugin for
2839
L.
2840
2841
=cut
2842
2843
sub selector {
2844
0
0
1
my ($self,$query,%options) = @_;
2845
0
0
$options{ user_info } ||= "CSS selector '$query'";
2846
0
0
0
if ('ARRAY' ne (ref $query || '')) {
2847
0
$query = [$query];
2848
};
2849
0
0
my $root = $options{ node } ? './' : '';
2850
0
my @q = map { selector_to_xpath($_, root => $root) } @$query;
0
2851
0
$self->xpath(\@q, %options);
2852
};
2853
2854
=head2 C<< $mech->by_id( $id, %options ) >>
2855
2856
my @text = $mech->by_id('_foo:bar');
2857
2858
Returns all nodes matching the given ids. If
2859
C<$id> is an array reference, it returns
2860
all nodes matched by any of the ids in the array.
2861
2862
This method is equivalent to calling C<< ->xpath >> :
2863
2864
$self->xpath(qq{//*[\@id="$_"], %options)
2865
2866
It is convenient when your element ids get mistaken for
2867
CSS selectors.
2868
2869
=cut
2870
2871
sub by_id {
2872
0
0
1
my ($self,$query,%options) = @_;
2873
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2874
0
$query = [$query];
2875
};
2876
$options{ user_info } ||= "id "
2877
0
0
. join(" or ", map {qq{'$_'}} @$query)
0
2878
. " found";
2879
0
$query = [map { qq{.//*[\@id="$_"]} } @$query];
0
2880
0
$self->xpath($query, %options)
2881
}
2882
2883
=head2 C<< $mech->click( $name [,$x ,$y] ) >>
2884
2885
$mech->click( 'go' );
2886
$mech->click({ xpath => '//button[@name="go"]' });
2887
2888
Has the effect of clicking a button (or other element) on the current form. The
2889
first argument is the C of the button to be clicked. The second and third
2890
arguments (optional) allow you to specify the (x,y) coordinates of the click.
2891
2892
If there is only one button on the form, C<< $mech->click() >> with
2893
no arguments simply clicks that one button.
2894
2895
If you pass in a hash reference instead of a name,
2896
the following keys are recognized:
2897
2898
=over 4
2899
2900
=item *
2901
2902
C - Find the element to click by the CSS selector
2903
2904
=item *
2905
2906
C - Find the element to click by the XPath query
2907
2908
=item *
2909
2910
C - Click on the passed DOM element
2911
2912
You can use this to click on arbitrary page elements. There is no convenient
2913
way to pass x/y co-ordinates with this method.
2914
2915
=item *
2916
2917
C - Click on the element with the given id
2918
2919
This is useful if your document ids contain characters that
2920
do look like CSS selectors. It is equivalent to
2921
2922
xpath => qq{//*[\@id="$id"]}
2923
2924
=item *
2925
2926
C - Synchronize the click (default is 1)
2927
2928
Synchronizing means that WWW::Mechanize::Firefox will wait until
2929
one of the events listed in C is fired. You want to switch
2930
it off when there will be no HTTP response or DOM event fired, for
2931
example for clicks that only modify the DOM.
2932
2933
You can pass in a scalar that is a false value to not wait for
2934
any kind of event.
2935
2936
Passing in an array reference will use the array elements as
2937
Javascript events to wait for.
2938
2939
Passing in any other true value will use the value of C<< ->events >>
2940
as the list of events to wait for.
2941
2942
=back
2943
2944
Returns a L object.
2945
2946
As a deviation from the WWW::Mechanize API, you can also pass a
2947
hash reference as the first parameter. In it, you can specify
2948
the parameters to search much like for the C calls.
2949
2950
Note: Currently, clicking on images with the C attribute
2951
does not trigger the move to the new URL. A workaround is to program
2952
the new URL into your script.
2953
2954
=cut
2955
2956
sub click {
2957
0
0
1
my ($self,$name,$x,$y) = @_;
2958
0
my %options;
2959
my @buttons;
2960
0
0
0
if (! defined $name) {
0
0
0
2961
0
croak("->click called with undef link");
2962
} elsif (ref $name and blessed($name) and $name->can('__click')) {
2963
0
$options{ dom } = $name;
2964
} elsif (ref $name eq 'HASH') { # options
2965
0
%options = %$name;
2966
} else {
2967
0
$options{ name } = $name;
2968
};
2969
0
0
if (exists $options{ name }) {
2970
0
0
$name = quotemeta($options{ name }|| '');
2971
$options{ xpath } = [
2972
0
sprintf( q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit" or @type="image") and @name="%s")]}, $name, $name),
2973
];
2974
0
0
if ($options{ name } eq '') {
2975
0
push @{ $options{ xpath }},
0
2976
q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input") and @type="button" or @type="submit" or @type="image"]},
2977
;
2978
};
2979
0
$options{ user_info } = "Button with name '$name'";
2980
};
2981
2982
0
0
if (! exists $options{ synchronize }) {
0
2983
0
$options{ synchronize } = $self->events;
2984
} elsif( ! ref $options{ synchronize }) {
2985
$options{ synchronize } = $options{ synchronize }
2986
0
0
? $self->events
2987
: [],
2988
};
2989
2990
0
0
if ($options{ dom }) {
2991
0
@buttons = $options{ dom };
2992
} else {
2993
0
@buttons = $self->_option_query(%options);
2994
};
2995
2996
$self->_sync_call(
2997
$options{ synchronize }, sub { # ,'abort'
2998
0
0
$buttons[0]->__click($x,$y);
2999
}
3000
0
);
3001
3002
0
0
if (defined wantarray) {
3003
0
return $self->response
3004
};
3005
}
3006
3007
=head2 C<< $mech->click_button( ... ) >>
3008
3009
$mech->click_button( name => 'go' );
3010
$mech->click_button( input => $mybutton );
3011
3012
Has the effect of clicking a button on the current form by specifying its
3013
name, value, or index. Its arguments are a list of key/value pairs. Only
3014
one of name, number, input or value must be specified in the keys.
3015
3016
=over 4
3017
3018
=item *
3019
3020
C - name of the button
3021
3022
=item *
3023
3024
C - value of the button
3025
3026
=item *
3027
3028
C - DOM node
3029
3030
=item *
3031
3032
C - id of the button
3033
3034
=item *
3035
3036
C - number of the button
3037
3038
=back
3039
3040
If you find yourself wanting to specify a button through its
3041
C or C, consider using C<< ->click >> instead.
3042
3043
=cut
3044
3045
sub click_button {
3046
0
0
1
my ($self,%options) = @_;
3047
0
my $node;
3048
my $xpath;
3049
0
my $user_message;
3050
0
0
if (exists $options{ input }) {
0
0
0
0
3051
0
$node = delete $options{ input };
3052
} elsif (exists $options{ name }) {
3053
0
my $v = delete $options{ name };
3054
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit") and @name="%s")]', $v, $v);
3055
0
$user_message = "Button name '$v' unknown";
3056
} elsif (exists $options{ value }) {
3057
0
my $v = delete $options{ value };
3058
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" and @value="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit") and @value="%s")]', $v, $v);
3059
0
$user_message = "Button value '$v' unknown";
3060
} elsif (exists $options{ id }) {
3061
0
my $v = delete $options{ id };
3062
0
$xpath = sprintf '//*[@id="%s"]', $v;
3063
0
$user_message = "Button name '$v' unknown";
3064
} elsif (exists $options{ number }) {
3065
0
my $v = delete $options{ number };
3066
0
$xpath = sprintf '//*[translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "input" and @type="submit")][%s]', $v;
3067
0
$user_message = "Button number '$v' out of range";
3068
};
3069
0
0
$node ||= $self->xpath( $xpath,
3070
node => $self->current_form,
3071
single => 1,
3072
user_message => $user_message,
3073
);
3074
0
0
if ($node) {
3075
0
$self->click({ dom => $node, %options });
3076
} else {
3077
3078
0
$self->signal_condition($user_message);
3079
};
3080
3081
}
3082
3083
=head1 FORM METHODS
3084
3085
=head2 C<< $mech->current_form() >>
3086
3087
print $mech->current_form->{name};
3088
3089
Returns the current form.
3090
3091
This method is incompatible with L.
3092
It returns the DOM C<<
3093
a L instance.
3094
3095
Note that WWW::Mechanize::Firefox has little way to know
3096
that the current form is not displayed in the browser
3097
anymore, so it often holds on to the last value. If
3098
you want to make sure that a fresh or no form is used,
3099
remove it:
3100
3101
$mech->clear_current_form;
3102
3103
The current form will be reset by WWW::Mechanize::Firefox
3104
on calls to C<< ->get() >> and C<< ->get_local() >>,
3105
and on calls to C<< ->submit() >> and C<< ->submit_with_fields >>.
3106
3107
=cut
3108
3109
sub current_form {
3110
$_[0]->{current_form}
3111
0
0
1
};
3112
sub clear_current_form {
3113
0
0
0
undef $_[0]->{current_form};
3114
};
3115
3116
=head2 C<< $mech->form_name( $name [, %options] ) >>
3117
3118
$mech->form_name( 'search' );
3119
3120
Selects the current form by its name. The options
3121
are identical to those accepted by the L<< /$mech->xpath >> method.
3122
3123
=cut
3124
3125
sub form_name {
3126
0
0
1
my ($self,$name,%options) = @_;
3127
0
$name = quote_xpath $name;
3128
0
_default_limiter( single => \%options );
3129
0
$self->{current_form} = $self->selector("form[name='$name']",
3130
user_info => "form name '$name'",
3131
%options
3132
);
3133
};
3134
3135
=head2 C<< $mech->form_id( $id [, %options] ) >>
3136
3137
$mech->form_id( 'login' );
3138
3139
Selects the current form by its C attribute.
3140
The options
3141
are identical to those accepted by the L<< /$mech->xpath >> method.
3142
3143
This is equivalent to calling
3144
3145
$mech->by_id($id,single => 1,%options)
3146
3147
=cut
3148
3149
sub form_id {
3150
0
0
1
my ($self,$name,%options) = @_;
3151
3152
0
_default_limiter( single => \%options );
3153
0
$self->{current_form} = $self->by_id($name,
3154
user_info => "form with id '$name'",
3155
%options
3156
);
3157
};
3158
3159
=head2 C<< $mech->form_number( $number [, %options] ) >>
3160
3161
$mech->form_number( 2 );
3162
3163
Selects the Ith form.
3164
The options
3165
are identical to those accepted by the L<< /$mech->xpath >> method.
3166
3167
=cut
3168
3169
sub form_number {
3170
0
0
1
my ($self,$number,%options) = @_;
3171
3172
0
_default_limiter( single => \%options );
3173
0
$self->{current_form} = $self->xpath("(//form)[$number]",
3174
user_info => "form number $number",
3175
%options
3176
);
3177
};
3178
3179
=head2 C<< $mech->form_with_fields( [$options], @fields ) >>
3180
3181
$mech->form_with_fields(
3182
'user', 'password'
3183
);
3184
3185
Find the form which has the listed fields.
3186
3187
If the first argument is a hash reference, it's taken
3188
as options to C<< ->xpath >>.
3189
3190
See also L<< /$mech->submit_form >>.
3191
3192
=cut
3193
3194
sub form_with_fields {
3195
0
0
1
my ($self,@fields) = @_;
3196
0
my $options = {};
3197
0
0
if (ref $fields[0] eq 'HASH') {
3198
0
$options = shift @fields;
3199
};
3200
0
my @clauses = map { $self->application->element_query([qw[input select textarea]], { 'name' => $_ })} @fields;
0
3201
3202
3203
0
my $q = "//form[" . join( " and ", @clauses)."]";
3204
#warn $q;
3205
0
_default_limiter( single => $options );
3206
0
$self->{current_form} = $self->xpath($q,
3207
user_info => "form with fields [@fields]",
3208
%$options
3209
);
3210
};
3211
3212
=head2 C<< $mech->forms( %options ) >>
3213
3214
my @forms = $mech->forms();
3215
3216
When called in a list context, returns a list
3217
of the forms found in the last fetched page.
3218
In a scalar context, returns a reference to
3219
an array with those forms.
3220
3221
The options
3222
are identical to those accepted by the L<< /$mech->selector >> method.
3223
3224
The returned elements are the DOM C<<
3225
3226
=cut
3227
3228
sub forms {
3229
0
0
1
my ($self, %options) = @_;
3230
0
my @res = $self->selector('form', %options);
3231
return wantarray ? @res
3232
0
0
: \@res
3233
};
3234
3235
=head2 C<< $mech->field( $selector, $value, [,\@pre_events [,\@post_events]] ) >>
3236
3237
$mech->field( user => 'joe' );
3238
$mech->field( not_empty => '', [], [] ); # bypass JS validation
3239
3240
Sets the field with the name given in C<$selector> to the given value.
3241
Returns the value.
3242
3243
The method understands very basic CSS selectors in the value for C<$selector>,
3244
like the L find_input() method.
3245
3246
A selector prefixed with '#' must match the id attribute of the input.
3247
A selector prefixed with '.' matches the class attribute. A selector
3248
prefixed with '^' or with no prefix matches the name attribute.
3249
3250
By passing the array reference C<@pre_events>, you can indicate which
3251
Javascript events you want to be triggered before setting the value.
3252
C<@post_events> contains the events you want to be triggered
3253
after setting the value.
3254
3255
By default, the events set in the
3256
constructor for C and C
3257
are triggered.
3258
3259
=cut
3260
3261
sub field {
3262
0
0
1
my ($self,$name,$value,$pre,$post) = @_;
3263
0
$self->get_set_value(
3264
name => $name,
3265
value => $value,
3266
pre => $pre,
3267
post => $post,
3268
node => $self->current_form,
3269
);
3270
}
3271
3272
=head2 C<< $mech->value( $selector_or_element, [%options] ) >>
3273
3274
print $mech->value( 'user' );
3275
3276
Returns the value of the field given by C<$selector_or_name> or of the
3277
DOM element passed in.
3278
3279
The legacy form of
3280
3281
$mech->value( name => value );
3282
3283
is also still supported but will likely be deprecated
3284
in favour of the C<< ->field >> method.
3285
3286
For fields that can have multiple values, like a C field,
3287
the method is context sensitive and returns the first selected
3288
value in scalar context and all values in list context.
3289
3290
=cut
3291
3292
sub value {
3293
0
0
0
1
if (@_ == 3) {
3294
0
my ($self,$name,$value) = @_;
3295
0
return $self->field($name => $value);
3296
} else {
3297
0
my ($self,$name,%options) = @_;
3298
0
return $self->get_set_value(
3299
node => $self->current_form,
3300
%options,
3301
name => $name,
3302
);
3303
};
3304
};
3305
3306
=head2 C<< $mech->get_set_value( %options ) >>
3307
3308
Allows fine-grained access to getting/setting a value
3309
with a different API. Supported keys are:
3310
3311
pre
3312
post
3313
name
3314
node
3315
value
3316
3317
in addition to all keys that C<< $mech->xpath >> supports.
3318
3319
=cut
3320
3321
sub _field_by_name {
3322
0
0
my ($self,%options) = @_;
3323
0
my @fields;
3324
0
my $name = delete $options{ name };
3325
0
my $attr = 'name';
3326
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3327
0
$attr = 'name'
3328
} elsif ($name =~ s/^#//) {
3329
0
$attr = 'id'
3330
} elsif ($name =~ s/^\.//) {
3331
0
$attr = 'class'
3332
};
3333
0
0
if (blessed $name) {
3334
0
@fields = $name;
3335
} else {
3336
0
_default_limiter( single => \%options );
3337
0
my $query = $self->application->element_query([qw[input select textarea]], { $attr => $name });
3338
#warn $query;
3339
0
@fields = $self->xpath($query,%options);
3340
};
3341
@fields
3342
0
}
3343
3344
sub get_set_value {
3345
0
0
1
my ($self,%options) = @_;
3346
0
my $set_value = exists $options{ value };
3347
0
my $value = delete $options{ value };
3348
0
0
my $pre = delete $options{pre} || $self->{pre_value};
3349
0
0
my $post = delete $options{post} || $self->{post_value};
3350
0
my $name = delete $options{ name };
3351
0
my @fields = $self->_field_by_name(
3352
name => $name,
3353
user_info => "input with name '$name'",
3354
%options );
3355
0
0
$pre = [$pre]
3356
if (! ref $pre);
3357
0
0
$post = [$post]
3358
if (! ref $post);
3359
3360
0
0
if ($fields[0]) {
3361
0
my $tag = $fields[0]->{tagName};
3362
0
0
if ($set_value) {
3363
0
for my $ev (@$pre) {
3364
0
$fields[0]->__event($ev);
3365
};
3366
3367
0
0
if ('select' eq $tag) {
3368
0
$self->select($fields[0], $value);
3369
} else {
3370
0
$fields[0]->{value} = $value;
3371
};
3372
3373
0
for my $ev (@$post) {
3374
0
$fields[0]->__event($ev);
3375
};
3376
};
3377
# What about 'checkbox'es/radioboxes?
3378
3379
# Don't bother to fetch the field's value if it's not wanted
3380
0
0
return unless defined wantarray;
3381
3382
# We could save some work here for the simple case of single-select
3383
# dropdowns by not enumerating all options
3384
0
0
if ('SELECT' eq uc $tag) {
3385
0
my @options = $self->xpath('.//option', node => $fields[0] );
3386
0
my @values = map { $_->{value} } grep { $_->{selected} } @options;
0
0
3387
0
0
if (wantarray) {
3388
return @values
3389
0
} else {
3390
0
return $values[0];
3391
}
3392
} else {
3393
return $fields[0]->{value}
3394
0
};
3395
} else {
3396
return
3397
0
}
3398
}
3399
3400
=head2 C<< $mech->select( $name, $value ) >>
3401
3402
=head2 C<< $mech->select( $name, \@values ) >>
3403
3404
Given the name of a C field, set its value to the value
3405
specified. If the field is not C<< >> and the
3406
C<$value> is an array, only the B value will be set.
3407
Passing C<$value> as a hash with
3408
an C key selects an item by number (e.g.
3409
C<< {n => 3} >> or C<< {n => [2,4]} >>).
3410
The numbering starts at 1. This applies to the current form.
3411
3412
If you have a field with C<< >> and you pass a single
3413
C<$value>, then C<$value> will be added to the list of fields selected,
3414
without clearing the others. However, if you pass an array reference,
3415
then all previously selected values will be cleared.
3416
3417
Returns true on successfully setting the value. On failure, returns
3418
false and calls C<< $self>warn() >> with an error message.
3419
3420
=cut
3421
3422
sub select {
3423
0
0
1
my ($self, $name, $value) = @_;
3424
0
my ($field) = $self->_field_by_name(
3425
node => $self->current_form,
3426
name => $name,
3427
#%options,
3428
);
3429
3430
0
0
if (! $field) {
3431
return
3432
0
};
3433
3434
0
my @options = $self->xpath( './/option', node => $field);
3435
0
my @by_index;
3436
my @by_value;
3437
0
my $single = $field->{type} eq "select-one";
3438
0
my $deselect;
3439
3440
0
0
0
if ('HASH' eq ref $value||'') {
0
0
3441
0
for (keys %$value) {
3442
0
0
$self->warn(qq{Unknown select value parameter "$_"})
3443
unless $_ eq 'n';
3444
}
3445
3446
0
$deselect = ref $value->{n};
3447
0
0
@by_index = ref $value->{n} ? @{ $value->{n} } : $value->{n};
0
3448
} elsif ('ARRAY' eq ref $value||'') {
3449
# clear all preselected values
3450
0
$deselect = 1;
3451
0
@by_value = @{ $value };
0
3452
} else {
3453
0
@by_value = $value;
3454
};
3455
3456
0
0
if ($deselect) {
3457
0
for my $o (@options) {
3458
0
$o->{selected} = 0;
3459
}
3460
};
3461
3462
0
0
if ($single) {
3463
# Only use the first element for single-element boxes
3464
0
0
$#by_index = 0+@by_index ? 0 : -1;
3465
0
0
$#by_value = 0+@by_value ? 0 : -1;
3466
};
3467
3468
# Select the items, either by index or by value
3469
0
for my $idx (@by_index) {
3470
0
$options[$idx-1]->{selected} = 1;
3471
};
3472
3473
0
for my $v (@by_value) {
3474
0
my $option = $self->xpath( sprintf( './/option[@value="%s"]', quote_xpath $v) , node => $field, single => 1 );
3475
0
$option->{selected} = 1;
3476
};
3477
3478
0
return @by_index + @by_value > 0;
3479
}
3480
3481
=head2 C<< $mech->tick( $name, $value [, $set ] ) >>
3482
3483
$mech->tick("confirmation_box", 'yes');
3484
3485
"Ticks" the first checkbox that has both the name and value associated with it
3486
on the current form. Dies if there is no named check box for that value.
3487
Passing in a false value as the third optional argument will cause the
3488
checkbox to be unticked.
3489
3490
(Un)ticking the checkbox is done by sending a click event to it if needed.
3491
If C<$value> is C, the first checkbox matching C<$name> will
3492
be (un)ticked.
3493
3494
If C<$name> is a reference to a hash, that hash will be used
3495
as the options to C<< ->find_link_dom >> to find the element.
3496
3497
=cut
3498
3499
sub tick {
3500
0
0
1
my ($self, $name, $value, $set) = @_;
3501
0
0
$set = 1
3502
if (@_ < 4);
3503
0
my %options;
3504
my @boxes;
3505
3506
0
0
0
if (! defined $name) {
0
0
0
3507
0
croak("->tick called with undef name");
3508
} elsif (ref $name and blessed($name) and $name->can('__click')) {
3509
0
$options{ dom } = $name;
3510
} elsif (ref $name eq 'HASH') { # options
3511
0
%options = %$name;
3512
} else {
3513
0
$options{ name } = $name;
3514
};
3515
3516
0
0
if (exists $options{ name }) {
3517
0
my $attr = 'name';
3518
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3519
0
$attr = 'name'
3520
} elsif ($name =~ s/^#//) {
3521
0
$attr = 'id'
3522
} elsif ($name =~ s/^\.//) {
3523
0
$attr = 'class'
3524
};
3525
0
$name = quotemeta($name);
3526
0
0
$value = quotemeta($value) if $value;
3527
3528
0
_default_limiter( one => \%options );
3529
$options{ xpath } = [
3530
0
0
defined $value
3531
? sprintf( q{//input[@type="checkbox" and @%s="%s" and @value="%s"]}, $attr, $name, $value)
3532
: sprintf( q{//input[@type="checkbox" and @%s="%s"]}, $attr, $name)
3533
];
3534
0
0
$options{ user_info } = defined $value
3535
? "Checkbox with name '$name' and value '$value'"
3536
: "Checkbox with name '$name'";
3537
};
3538
3539
0
0
if ($options{ dom }) {
3540
0
@boxes = $options{ dom };
3541
} else {
3542
0
@boxes = $self->_option_query(%options);
3543
};
3544
3545
0
my $target = $boxes[0];
3546
0
my $is_set = $self->application->bool_ff_to_perl( $target->{checked} );
3547
0
0
0
if ($set xor $is_set) {
3548
0
0
if ($set) {
3549
0
$target->{checked}= 'checked';
3550
} else {
3551
0
$target->{checked} = 0;
3552
};
3553
};
3554
};
3555
3556
=head2 C<< $mech->untick( $name, $value ) >>
3557
3558
$mech->untick('spam_confirm','yes',undef)
3559
3560
Causes the checkbox to be unticked. Shorthand for
3561
3562
$mech->tick($name,$value,undef)
3563
3564
=cut
3565
3566
sub untick {
3567
0
0
1
my ($self, $name, $value) = @_;
3568
0
$self->tick( $name, $value, undef );
3569
};
3570
3571
=head2 C<< $mech->submit( $form ) >>
3572
3573
$mech->submit;
3574
3575
Submits the form. Note that this does B fire the C
3576
event and thus also does not fire eventual Javascript handlers.
3577
Maybe you want to use C<< $mech->click >> instead.
3578
3579
The default is to submit the current form as returned
3580
by C<< $mech->current_form >>.
3581
3582
=cut
3583
3584
sub submit {
3585
0
0
1
my ($self,$dom_form) = @_;
3586
0
0
$dom_form ||= $self->current_form;
3587
0
0
if ($dom_form) {
3588
0
$dom_form->submit(); # why don't we ->synchronize here??
3589
0
$self->signal_http_status;
3590
3591
0
$self->clear_current_form;
3592
0
1;
3593
} else {
3594
0
croak "I don't know which form to submit, sorry.";
3595
}
3596
};
3597
3598
=head2 C<< $mech->submit_form( %options ) >>
3599
3600
$mech->submit_form(
3601
with_fields => {
3602
user => 'me',
3603
pass => 'secret',
3604
}
3605
);
3606
3607
This method lets you select a form from the previously fetched page,
3608
fill in its fields, and submit it. It combines the form_number/form_name,
3609
set_fields and click methods into one higher level call. Its arguments are
3610
a list of key/value pairs, all of which are optional.
3611
3612
=over 4
3613
3614
=item *
3615
3616
C<< form => $mech->current_form() >>
3617
3618
Specifies the form to be filled and submitted. Defaults to the current form.
3619
3620
=item *
3621
3622
C<< fields => \%fields >>
3623
3624
Specifies the fields to be filled in the current form
3625
3626
=item *
3627
3628
C<< with_fields => \%fields >>
3629
3630
Probably all you need for the common case. It combines a smart form selector
3631
and data setting in one operation. It selects the first form that contains
3632
all fields mentioned in \%fields. This is nice because you don't need to
3633
know the name or number of the form to do this.
3634
3635
(calls L<< /$mech->form_with_fields() >> and L<< /$mech->set_fields() >>).
3636
3637
If you choose this, the form_number, form_name, form_id and fields options
3638
will be ignored.
3639
3640
=back
3641
3642
=cut
3643
3644
sub submit_form {
3645
0
0
1
my ($self,%options) = @_;
3646
3647
0
my $form = delete $options{ form };
3648
0
my $fields;
3649
0
0
if (! $form) {
3650
0
0
if ($fields = delete $options{ with_fields }) {
3651
0
my @names = keys %$fields;
3652
0
$form = $self->form_with_fields( \%options, @names );
3653
0
0
if (! $form) {
3654
0
$self->signal_condition("Couldn't find a matching form for @names.");
3655
return
3656
0
};
3657
} else {
3658
0
0
$fields = delete $options{ fields } || {};
3659
0
$form = $self->current_form;
3660
};
3661
};
3662
3663
0
0
if (! $form) {
3664
0
$self->signal_condition("No form found to submit.");
3665
return
3666
0
};
3667
0
$self->do_set_fields( form => $form, fields => $fields );
3668
0
$self->submit($form);
3669
}
3670
3671
=head2 C<< $mech->set_fields( $name => $value, ... ) >>
3672
3673
$mech->set_fields(
3674
user => 'me',
3675
pass => 'secret',
3676
);
3677
3678
This method sets multiple fields of the current form. It takes a list of
3679
field name and value pairs. If there is more than one field with the same
3680
name, the first one found is set. If you want to select which of the
3681
duplicate field to set, use a value which is an anonymous array which
3682
has the field value and its number as the 2 elements.
3683
3684
=cut
3685
3686
sub set_fields {
3687
0
0
1
my ($self, %fields) = @_;
3688
0
my $f = $self->current_form;
3689
0
0
if (! $f) {
3690
0
croak "Can't set fields: No current form set.";
3691
};
3692
0
$self->do_set_fields(form => $f, fields => \%fields);
3693
};
3694
3695
sub do_set_fields {
3696
0
0
0
my ($self, %options) = @_;
3697
0
my $form = delete $options{ form };
3698
0
my $fields = delete $options{ fields };
3699
3700
0
while (my($n,$v) = each %$fields) {
3701
0
0
if (ref $v) {
3702
0
($v,my $num) = @$v;
3703
0
0
warn "Index larger than 1 not supported, ignoring"
3704
unless $num == 1;
3705
};
3706
3707
0
$self->get_set_value( node => $form, name => $n, value => $v, %options );
3708
}
3709
};
3710
3711
=head2 C<< $mech->set_visible( @values ) >>
3712
3713
$mech->set_visible( $username, $password );
3714
3715
This method sets fields of the current form without having to know their
3716
names. So if you have a login screen that wants a username and password,
3717
you do not have to fetch the form and inspect the source (or use the
3718
C utility, installed with L) to see what
3719
the field names are; you can just say
3720
3721
$mech->set_visible( $username, $password );
3722
3723
and the first and second fields will be set accordingly. The method is
3724
called set_visible because it acts only on visible fields;
3725
hidden form inputs are not considered. It also respects
3726
the respective return value of C<< ->is_visible() >> for each
3727
field, so hiding of fields through CSS affects this too.
3728
3729
The specifiers that are possible in L are not yet supported.
3730
3731
=cut
3732
3733
sub set_visible {
3734
0
0
1
my ($self,@values) = @_;
3735
0
my $form = $self->current_form;
3736
0
my @form;
3737
0
0
if ($form) { @form = (node => $form) };
0
3738
0
my @visible_fields = $self->xpath( q{//input[not(@type) or }
3739
. q{(@type!= "hidden" and }
3740
. q{ @type!= "button" and }
3741
. q{ @type!= "submit" and }
3742
. q{ @type!= "image")]},
3743
@form
3744
);
3745
3746
0
@visible_fields = grep { $self->is_visible( $_ ) } @visible_fields;
0
3747
3748
0
0
if (@values > @visible_fields) {
3749
0
$self->signal_condition( "Not enough fields on page" );
3750
} else {
3751
0
for my $idx (0..$#values) {
3752
0
$self->field( $visible_fields[ $idx ] => $values[ $idx ]);
3753
};
3754
}
3755
}
3756
3757
=head2 C<< $mech->is_visible( $element ) >>
3758
3759
=head2 C<< $mech->is_visible( %options ) >>
3760
3761
if ($mech->is_visible( selector => '#login' )) {
3762
print "You can log in now.";
3763
};
3764
3765
Returns true if the element is visible, that is, it is
3766
a member of the DOM and neither it nor its ancestors have
3767
a CSS C attribute of C or
3768
a C attribute of C.
3769
3770
You can either pass in a DOM element or a set of key/value
3771
pairs to search the document for the element you want.
3772
3773
=over 4
3774
3775
=item *
3776
3777
C - the XPath query
3778
3779
=item *
3780
3781
C - the CSS selector
3782
3783
=item *
3784
3785
C - a DOM node
3786
3787
=back
3788
3789
The remaining options are passed through to either the
3790
L<< /$mech->xpath|xpath >> or L<< /$mech->selector|selector >> method.
3791
3792
=cut
3793
3794
sub is_visible {
3795
0
0
1
my ($self,%options);
3796
0
0
if (2 == @_) {
3797
0
($self,$options{dom}) = @_;
3798
} else {
3799
0
($self,%options) = @_;
3800
};
3801
0
_default_limiter( 'maybe', \%options );
3802
0
0
if (! $options{dom}) {
3803
0
$options{dom} = $self->_option_query(%options);
3804
};
3805
# No element means not visible
3806
return
3807
0
0
unless $options{ dom };
3808
0
0
$options{ window } ||= $self->tab->{linkedBrowser}->{contentWindow};
3809
3810
0
my $_is_visible = $self->repl->declare(<<'JS');
3811
function (obj,window)
3812
{
3813
while (obj) {
3814
// No object
3815
if (!obj) return false;
3816
3817
try {
3818
if( obj["parentNode"] ) 1;
3819
} catch (e) {
3820
// Dead object
3821
return false
3822
};
3823
// Descends from document, so we're done
3824
if (obj.parentNode === obj.ownerDocument) {
3825
return true;
3826
};
3827
// Not in the DOM
3828
if (!obj.parentNode) {
3829
return false;
3830
};
3831
// Direct style check
3832
if (obj.style) {
3833
if (obj.style.display == 'none') return false;
3834
if (obj.style.visibility == 'hidden') return false;
3835
};
3836
3837
if (window.getComputedStyle) {
3838
var style = window.getComputedStyle(obj, null);
3839
if (style.display == 'none') {
3840
return false; }
3841
if (style.visibility == 'hidden') {
3842
return false;
3843
};
3844
};
3845
obj = obj.parentNode;
3846
};
3847
// The object does not live in the DOM at all
3848
return false
3849
}
3850
JS
3851
0
!!$_is_visible->($options{dom}, $options{window});
3852
};
3853
3854
=head2 C<< $mech->wait_until_invisible( $element ) >>
3855
3856
=head2 C<< $mech->wait_until_invisible( %options ) >>
3857
3858
$mech->wait_until_invisible( $please_wait );
3859
3860
Waits until an element is not visible anymore.
3861
3862
Takes the same options as L<< $mech->is_visible/->is_visible >>.
3863
3864
In addition, the following options are accepted:
3865
3866
=over 4
3867
3868
=item *
3869
3870
C - the timeout after which the function will C. To catch
3871
the condition and handle it in your calling program, use an L block.
3872
A timeout of C<0> means to never time out.
3873
3874
=item *
3875
3876
C - the interval in seconds used to L. Subsecond
3877
intervals are possible.
3878
3879
=back
3880
3881
Note that when passing in a selector, that selector is requeried
3882
on every poll instance. So the following query will work as expected:
3883
3884
xpath => '//*[contains(text(),"stand by")]'
3885
3886
This also means that if your selector query relies on finding
3887
a changing text, you need to pass the node explicitly instead of
3888
passing the selector.
3889
3890
=cut
3891
3892
sub wait_until_invisible {
3893
0
0
1
my ($self,%options);
3894
0
0
if (2 == @_) {
3895
0
($self,$options{dom}) = @_;
3896
} else {
3897
0
($self,%options) = @_;
3898
};
3899
0
0
my $sleep = delete $options{ sleep } || 0.3;
3900
0
0
my $timeout = delete $options{ timeout } || 0;
3901
3902
0
_default_limiter( 'maybe', \%options );
3903
3904
3905
0
my $timeout_after;
3906
0
0
if ($timeout) {
3907
0
$timeout_after = time + $timeout;
3908
};
3909
0
my $v;
3910
my $node;
3911
0
0
do {
0
3912
0
$node = $options{ dom };
3913
0
0
if (! $node) {
3914
0
$node = $self->_option_query(%options);
3915
};
3916
return
3917
0
0
unless $node;
3918
0
sleep $sleep;
3919
} while ( $v = $self->is_visible($node)
3920
and (!$timeout_after or time < $timeout_after ));
3921
0
0
0
if ($node and time >= $timeout_after) {
3922
0
croak "Timeout of $timeout seconds reached while waiting for element to become invisible";
3923
};
3924
};
3925
3926
# Internal method to run either an XPath, CSS or id query against the DOM
3927
# Returns the element(s) found
3928
my %rename = (
3929
xpath => 'xpath',
3930
selector => 'selector',
3931
id => 'by_id',
3932
by_id => 'by_id',
3933
);
3934
3935
sub _option_query {
3936
0
0
my ($self,%options) = @_;
3937
0
my ($method,$q);
3938
0
for my $meth (keys %rename) {
3939
0
0
if (exists $options{ $meth }) {
3940
0
$q = delete $options{ $meth };
3941
0
0
$method = $rename{ $meth } || $meth;
3942
}
3943
};
3944
0
_default_limiter( 'one' => \%options );
3945
0
0
croak "Need either a name, a selector or an xpath key!"
3946
if not $method;
3947
0
return $self->$method( $q, %options );
3948
};
3949
3950
=head2 C<< $mech->clickables() >>
3951
3952
print "You could click on\n";
3953
for my $el ($mech->clickables) {
3954
print $el->{innerHTML}, "\n";
3955
};
3956
3957
Returns all clickable elements, that is, all elements
3958
with an C attribute.
3959
3960
=cut
3961
3962
sub clickables {
3963
0
0
1
my ($self, %options) = @_;
3964
0
$self->xpath('//*[@onclick]', %options);
3965
};
3966
3967
=head2 C<< $mech->expand_frames( $spec ) >>
3968
3969
my @frames = $mech->expand_frames();
3970
3971
Expands the frame selectors (or C<1> to match all frames)
3972
into their respective DOM document nodes according to the current
3973
document. All frames will be visited in breadth first order.
3974
3975
This is mostly an internal method.
3976
3977
=cut
3978
3979
sub expand_frames {
3980
0
0
1
my ($self, $spec, $document) = @_;
3981
0
0
$spec ||= $self->{frames};
3982
0
0
my @spec = ref $spec ? @$spec : $spec;
3983
0
0
$document ||= $self->document;
3984
3985
0
0
0
if (! ref $spec and $spec !~ /\D/ and $spec == 1) {
0
3986
# All frames
3987
0
@spec = qw( frame iframe );
3988
};
3989
3990
# Optimize the default case of only names in @spec
3991
0
my @res;
3992
0
0
if (! grep {ref} @spec) {
0
3993
0
@res = map { $_->{contentDocument} }
0
3994
$self->selector(
3995
\@spec,
3996
document => $document,
3997
frames => 0, # otherwise we'll recurse :)
3998
);
3999
} else {
4000
@res =
4001
map { #warn "Expanding $_";
4002
0
0
ref $_
0
4003
? $_
4004
# Just recurse into the above code path
4005
: $self->expand_frames( $_, $document );
4006
} @spec;
4007
}
4008
};
4009
4010
=head1 IMAGE METHODS
4011
4012
=head2 C<< $mech->content_as_png( [$tab, \%coordinates, \%target_size ] ) >>
4013
4014
my $png_data = $mech->content_as_png();
4015
4016
# Create scaled-down 480px wide preview
4017
my $png_data = $mech->content_as_png(undef, undef, { width => 480 });
4018
4019
Returns the given tab or the current page rendered as PNG image.
4020
4021
All parameters are optional.
4022
4023
=over 4
4024
4025
=item *
4026
4027
C<$tab> defaults to the current tab.
4028
4029
=item *
4030
4031
If the coordinates are given, that rectangle will be cut out.
4032
The coordinates should be a hash with the four usual entries,
4033
C,C,C,C.
4034
4035
=item *
4036
4037
The target size of the image can also be given. It defaults to
4038
the size of the image. The allowed parameters in the hash are
4039
4040
C, C - for specifying the scale, default is 1.0 in each direction.
4041
4042
C, C - for specifying the target size
4043
4044
If you want the resulting image to be 480 pixels wide, specify
4045
4046
{ width => 480 }
4047
4048
The height will then be calculated from the ratio of original width to
4049
original height.
4050
4051
=back
4052
4053
This method is specific to WWW::Mechanize::Firefox.
4054
4055
Currently, the data transfer between Firefox and Perl
4056
is done Base64-encoded. It would be beneficial to find what's
4057
necessary to make JSON handle binary data more gracefully.
4058
4059
=cut
4060
4061
sub content_as_png {
4062
0
0
1
my ($self, $tab, $rect, $target_rect) = @_;
4063
0
0
$tab ||= $self->tab;
4064
0
0
$rect ||= {};
4065
0
0
$target_rect ||= {};
4066
4067
# Mostly taken from
4068
# http://wiki.github.com/bard/mozrepl/interactor-screenshot-server
4069
# Except for the addition of a target image size
4070
0
my $screenshot = $self->repl->declare(<<'JS');
4071
function (tab,rect,target_rect) {
4072
var browser = tab.linkedBrowser;
4073
var browserWindow = Components.classes['@mozilla.org/appshell/window-mediator;1']
4074
.getService(Components.interfaces.nsIWindowMediator)
4075
.getMostRecentWindow('navigator:browser');
4076
var win = browser.contentWindow;
4077
var body = win.document.body;
4078
if(!body) {
4079
return;
4080
};
4081
var canvas = browserWindow
4082
.document
4083
.createElementNS('http://www.w3.org/1999/xhtml', 'canvas');
4084
var left = rect.left || 0;
4085
var top = rect.top || 0;
4086
var width = rect.width || body.clientWidth;
4087
var height = rect.height || body.clientHeight;
4088
4089
if( isNaN( target_rect.scalex * target_rect.scaley ) || target_rect.scalex * target_rect.scaley == 0) {
4090
// No scale was given
4091
// Was a fixed target width / height given?
4092
if( target_rect.width ) {
4093
target_rect.scalex = target_rect.width / width;
4094
};
4095
if( target_rect.height ) {
4096
target_rect.scaley = target_rect.height / height
4097
};
4098
4099
// If only one of scalex / scaley is given, force the other
4100
// to be the same, default to 1.0
4101
target_rect.scalex = target_rect.scalex || target_rect.scaley || (target_rect.width / width) || 1.0;
4102
target_rect.scaley = target_rect.scaley || target_rect.scalex || (target_rect.height / height) || 1.0;
4103
} else {
4104
//alert("scales fixed");
4105
};
4106
// Calculate the target width/height if missing:
4107
target_rect.height = target_rect.height || height * target_rect.scaley;
4108
target_rect.width = target_rect.width || width * target_rect.scalex;
4109
4110
canvas.width = target_rect.width;
4111
canvas.height = target_rect.height;
4112
var ctx = canvas.getContext('2d');
4113
ctx.clearRect(0, 0, target_rect.width, target_rect.height);
4114
ctx.save();
4115
ctx.scale(target_rect.scalex, target_rect.scaley);
4116
ctx.drawWindow(win, left, top, width, height, 'rgb(255,255,255)');
4117
ctx.restore();
4118
4119
//return atob(
4120
return canvas
4121
.toDataURL('image/png', '')
4122
.split(',')[1]
4123
// );
4124
}
4125
JS
4126
0
my $scr = $screenshot->($tab, $rect, $target_rect);
4127
0
0
return $scr ? decode_base64($scr) : undef
4128
};
4129
4130
=head2 C<< $mech->element_as_png( $element ) >>
4131
4132
my $shiny = $mech->selector('#shiny', single => 1);
4133
my $i_want_this = $mech->element_as_png($shiny);
4134
4135
Returns PNG image data for a single element
4136
4137
=cut
4138
4139
sub element_as_png {
4140
0
0
1
my ($self, $element) = @_;
4141
0
my $tab = $self->tab;
4142
4143
0
my $pos = $self->element_coordinates($element);
4144
0
return $self->content_as_png($tab, $pos);
4145
};
4146
4147
=head2 C<< $mech->element_coordinates( $element ) >>
4148
4149
my $shiny = $mech->selector('#shiny', single => 1);
4150
my ($pos) = $mech->element_coordinates($shiny);
4151
print $pos->{left},',', $pos->{top};
4152
4153
Returns the page-coordinates of the C<$element>
4154
in pixels as a hash with four entries, C, C, C and C.
4155
4156
This function might get moved into another module more geared
4157
towards rendering HTML.
4158
4159
=cut
4160
4161
sub element_coordinates {
4162
0
0
1
my ($self, $element) = @_;
4163
4164
# Mostly taken from
4165
# http://www.quirksmode.org/js/findpos.html
4166
0
my $findPos = $self->repl->declare(<<'JS');
4167
function (obj) {
4168
var res = {
4169
left: 0,
4170
top: 0,
4171
width: obj.scrollWidth,
4172
height: obj.scrollHeight
4173
};
4174
if (obj.offsetParent) {
4175
do {
4176
res.left += obj.offsetLeft;
4177
res.top += obj.offsetTop;
4178
} while (obj = obj.offsetParent);
4179
}
4180
return res;
4181
}
4182
JS
4183
0
$findPos->($element);
4184
};
4185
4186
1;
4187
4188
__END__