File Coverage

blib/lib/WWW/Mechanize/Firefox.pm
Criterion Covered Total %
statement 62 917 6.7
branch 3 406 0.7
condition 0 253 0.0
subroutine 20 131 15.2
pod 79 88 89.7
total 164 1795 9.1


line stmt bran cond sub pod time code
1             package WWW::Mechanize::Firefox;
2 80     80   4556055 use 5.006; #weaken
  80         981  
3 80     80   517 use strict;
  80         190  
  80         2454  
4 80     80   48150 use Time::HiRes qw(sleep); # hires sleep()
  80         115410  
  80         346  
5              
6 80     80   55401 use URI ();
  80         532048  
  80         2193  
7 80     80   560 use File::Basename qw(dirname);
  80         164  
  80         7437  
8 80     80   39927 use HTTP::Response ();
  80         1744599  
  80         2757  
9 80     80   40155 use HTML::Selector::XPath 'selector_to_xpath';
  80         213333  
  80         5677  
10 80     80   37059 use MIME::Base64 'decode_base64';
  80         49597  
  80         6561  
11 80     80   34951 use WWW::Mechanize::Link;
  80         32564  
  80         2528  
12 80     80   37458 use Firefox::Application;
  80         282  
  80         2666  
13 80     80   577 use MozRepl::RemoteObject ();
  80         182  
  80         1205  
14 80     80   404 use MozRepl::RemoteObject::Methods ();
  80         175  
  80         1153  
15 80     80   38315 use HTTP::Cookies::MozRepl ();
  80         269  
  80         1772  
16 80     80   38356 use HTTP::Request::Common ();
  80         295853  
  80         2174  
17 80     80   567 use Scalar::Util qw'blessed weaken';
  80         222  
  80         4983  
18 80     80   525 use Encode qw(encode decode);
  80         188  
  80         3875  
19 80     80   526 use Carp qw(carp croak );
  80         201  
  80         72513  
20              
21             our $VERSION = '0.80';
22             our @CARP_NOT = ('MozRepl::RemoteObject',
23             'MozRepl::AnyEvent',
24             'MozRepl::RemoteObject::Instance'
25             ); # we trust these blindly
26              
27             =head1 NAME
28              
29             WWW::Mechanize::Firefox - use Firefox as if it were WWW::Mechanize
30              
31             =head1 SYNOPSIS
32              
33             use WWW::Mechanize::Firefox;
34             my $mech = WWW::Mechanize::Firefox->new();
35             $mech->get('http://google.com');
36              
37             $mech->eval_in_page('alert("Hello Firefox")');
38             my $png = $mech->content_as_png();
39              
40             This module will let you automate Firefox through the
41             Mozrepl plugin. You need to have installed
42             that plugin in your Firefox.
43              
44             For more examples see L.
45              
46             =head1 IMPORTANT NOTICE
47              
48             The Mozrepl plugin that this module uses no longer works due to key technologies
49             it depends on being retired from the Mozilla platform in November 2017.
50              
51             According the github repo L, the last known compatible version is Firefox 54.
52              
53             Therefore this module cannot be used on Firefox versions greather than 54.
54              
55             =head1 CONSTRUCTOR and CONFIGURATION
56              
57             =head2 C<< $mech->new( %args ) >>
58              
59             use WWW::Mechanize::Firefox;
60             my $mech = WWW::Mechanize::Firefox->new();
61              
62             Creates a new instance and connects it to Firefox.
63              
64             Note that Firefox must have the C
65             extension installed and enabled.
66              
67             The following options are recognized:
68              
69             =over 4
70              
71             =item *
72              
73             C - regex for the title of the tab to reuse. If no matching tab is
74             found, the constructor dies.
75              
76             If you pass in the string C, the currently
77             active tab will be used instead.
78              
79             If you pass in a L instance, this will be used
80             as the new tab. This is convenient if you have an existing tab
81             in Firefox as object already, for example created through
82             LC<< ->addTab() >>.
83              
84             =item *
85              
86             C - will create a new tab if no existing tab matching
87             the criteria given in C can be found.
88              
89             =item *
90              
91             C - make the tab the active tab
92              
93             =item *
94              
95             C - name of the program to launch if we can't connect to it on
96             the first try.
97              
98             =item *
99              
100             C - an array reference of ids of subframes to include when
101             searching for elements on a page.
102              
103             If you want to always search through all frames, just pass C<1>. This
104             is the default.
105              
106             To prevent searching through frames, pass
107              
108             frames => 0
109              
110             To whitelist frames to be searched, pass the list
111             of frame selectors:
112              
113             frames => ['#content_frame']
114              
115             =item *
116              
117             C - whether web failures converted are fatal Perl errors. See
118             the C accessor. True by default to make error checking easier.
119              
120             To make errors non-fatal, pass
121              
122             autodie => 0
123              
124             in the constructor.
125              
126             =item *
127              
128             C - the name of the User Agent to use. This overrides
129             how Firefox identifies itself.
130              
131             =item *
132              
133             C - array reference to log levels, passed through to L
134              
135             =item *
136              
137             C - L buffer size, if the default of 1MB is not enough
138              
139             =item *
140              
141             C - the set of default Javascript events to listen for while
142             waiting for a reply. In fact, WWW::Mechanize::Firefox will almost always
143             wait until a 'DOMContentLoaded' or 'load' event. 'pagehide' events
144             will tell it for what frames to wait.
145              
146             The default set is
147              
148             'DOMContentLoaded','load',
149             'pageshow',
150             'pagehide',
151             'error','abort','stop',
152              
153             =item *
154              
155             C - a premade L
156              
157             =item *
158              
159             C - a premade L instance or a connection string
160             suitable for initializing one
161              
162             =item *
163              
164             C - whether to use the command queueing of L.
165             Default is 1.
166              
167             =item *
168              
169             C - whether to use native JSON encoder of Firefox
170              
171             js_JSON => 'native', # force using the native JSON encoder
172              
173             The default is to autodetect whether a native JSON encoder is available and
174             whether the transport is UTF-8 safe.
175              
176             =item *
177              
178             C - the events that are sent to an input field before its
179             value is changed. By default this is C<[focus]>.
180              
181             =item *
182              
183             C - the events that are sent to an input field after its
184             value is changed. By default this is C<[blur, change]>.
185              
186             =back
187              
188             =cut
189              
190             sub new {
191 61     61 1 117241 my ($class, %args) = @_;
192              
193 61 50       363 if (! ref $args{ app }) {
194 61         355 my @passthrough = qw(launch repl bufsize log use_queue js_JSON);
195 61 100       213 my %options = map { exists $args{ $_ } ? ($_ => delete $args{ $_ }) : () }
  366         977  
196             @passthrough;
197 61         577 $args{ app } = Firefox::Application->new(
198             %options
199             );
200             };
201              
202 0 0         if (my $tabname = delete $args{ tab }) {
203 0 0         if (! ref $tabname) {
    0          
204 0 0         if ($tabname eq 'current') {
205 0           $args{ tab } = $args{ app }->selectedTab();
206             } else {
207 0           croak "Don't know what to do with tab '$tabname'. Did you mean qr{$tabname}?";
208             };
209             } elsif ('MozRepl::RemoteObject::Instance' eq ref $tabname) {
210             # Nothing to do - we already got a tab passed in
211             # Just put it back in place
212 0           $args{ tab } = $tabname;
213             } else {
214 0           ($args{ tab }) = grep { $_->{title} =~ /$tabname/ }
215 0           $args{ app }->openTabs();
216 0 0         if (! $args{ tab }) {
217 0 0         if (! delete $args{ create }) {
218 0           croak "Couldn't find a tab matching /$tabname/";
219             } else {
220             # fall through into tab creation
221             };
222             } else {
223 0           $args{ tab } = $args{ tab }->{tab};
224             };
225             };
226             };
227 0 0         if (! $args{ tab }) {
228 0 0         my @autoclose = exists $args{ autoclose } ? (autoclose => $args{ autoclose }) : ();
229 0           $args{ tab } = $args{ app }->addTab( @autoclose );
230 0           my $body = $args{ tab }->MozRepl::RemoteObject::Methods::dive(qw[ linkedBrowser contentWindow document body ]);
231 0           $body->{innerHTML} = __PACKAGE__;
232             };
233              
234 0 0         if (delete $args{ autoclose }) {
235 0           $args{ app }->autoclose_tab($args{ tab });
236             };
237 0 0         if (! exists $args{ autodie }) { $args{ autodie } = 1 };
  0            
238              
239             $args{ events } ||= [
240 0   0       'DOMContentLoaded','load',
241             'pageshow', # Navigation from cache will use "pageshow"
242             #'pagehide',
243             'error','abort','stop',
244             ];
245 0   0       $args{ on_event } ||= undef;
246 0   0       $args{ pre_value } ||= ['focus'];
247 0   0       $args{ post_value } ||= ['change','blur'];
248 0 0         if( ! exists $args{ frames }) {
249 0   0       $args{ frames } ||= 1; # we default to searching frames
250             };
251              
252             die "No tab found"
253 0 0         unless $args{tab};
254              
255 0 0         if (delete $args{ activate }) {
256 0           $args{ app }->activateTab( $args{ tab });
257             };
258              
259 0   0       $args{ response } ||= undef;
260 0   0       $args{ current_form } ||= undef;
261              
262 0   0       $args{ event_log } ||= [];
263              
264 0           my $agent = delete $args{ agent };
265              
266 0           my $self= bless \%args, $class;
267              
268 0           $self->_initXpathResultTypes;
269              
270 0 0         if( defined $agent ) {
271 0           $self->agent( $agent );
272             };
273              
274 0           $self
275             };
276              
277             sub DESTROY {
278 0     0     my ($self) = @_;
279 0           local $@;
280 0 0         if (my $app = delete $self->{ app }) {
281 0           %$self = (); # wipe out all references we keep
282             # but keep $app alive until we can dispose of it
283             # as the last thing, now:
284 0           $app = undef;
285             };
286             }
287              
288             =head2 C<< $mech->agent( $product_id ); >>
289              
290             $mech->agent('wonderbot/JS 1.0');
291              
292             Set the product token that is used to identify the user agent on the network.
293             The agent value is sent as the "User-Agent" header in the requests. The default
294             is whatever Firefox uses.
295              
296             To reset the user agent to the Firefox default, pass an empty string:
297              
298             $mech->agent('');
299              
300             =cut
301              
302             sub agent {
303 0     0 1   my ($self,$name) = @_;
304 0 0         if( defined $name ) {
    0          
305 0           $self->add_header('User-Agent',$name);
306             } elsif( $name eq '' ) {
307 0           $self->delete_header('User-Agent');
308             };
309             };
310              
311             =head2 C<< $mech->autodie( [$state] ) >>
312              
313             $mech->autodie(0);
314              
315             Accessor to get/set whether warnings become fatal.
316              
317             =cut
318              
319 0 0   0 1   sub autodie { $_[0]->{autodie} = $_[1] if @_ == 2; $_[0]->{autodie} }
  0            
320              
321             =head2 C<< $mech->events() >>
322              
323             $mech->events( ['load'] );
324              
325             Sets or gets the set of Javascript events that WWW::Mechanize::Firefox
326             will wait for after requesting a new page. Returns an array reference.
327              
328             Changing the set of events will most likely make WWW::Mechanize::Firefox
329             stall while waiting for a response.
330              
331             This method is special to WWW::Mechanize::Firefox.
332              
333             =cut
334              
335 0 0   0 1   sub events { $_[0]->{events} = $_[1] if (@_ > 1); $_[0]->{events} };
  0            
336              
337             =head2 C<< $mech->on_event() >>
338              
339             $mech->on_event(1); # prints every page load event
340              
341             # or give it a callback
342             $mech->on_event(sub { warn "Page loaded with $ev->{name} event" });
343              
344             Gets/sets the notification handler for the Javascript event
345             that finished a page load. Set it to C<1> to output via C,
346             or a code reference to call it with the event.
347              
348             This method is special to WWW::Mechanize::Firefox.
349              
350             =cut
351              
352 0 0   0 1   sub on_event { $_[0]->{on_event} = $_[1] if (@_ > 1); $_[0]->{on_event} };
  0            
353              
354             =head2 C<< $mech->cookies() >>
355              
356             my $cookie_jar = $mech->cookies();
357              
358             Returns a L object that was initialized
359             from the live Firefox instance.
360              
361             B C<< ->set_cookie >> is not yet implemented,
362             as is saving the cookie jar.
363              
364             =cut
365              
366             sub cookies {
367 0     0 1   return HTTP::Cookies::MozRepl->new(
368             repl => $_[0]->repl
369             )
370             }
371              
372             =head1 JAVASCRIPT METHODS
373              
374             =head2 C<< $mech->allow( %options ) >>
375              
376             Enables or disables browser features for the current tab.
377             The following options are recognized:
378              
379             =over 4
380              
381             =item *
382              
383             C - Whether to allow plugin execution.
384              
385             =item *
386              
387             C - Whether to allow Javascript execution.
388              
389             =item *
390              
391             C - Attribute stating if refresh based redirects can be allowed.
392              
393             =item *
394              
395             C, C - Attribute stating if it should allow subframes (framesets/iframes) or not.
396              
397             =item *
398              
399             C - Attribute stating whether or not images should be loaded.
400              
401             =back
402              
403             Options not listed remain unchanged.
404              
405             =head3 Disable Javascript
406              
407             $mech->allow( javascript => 0 );
408              
409             =cut
410              
411 80     80   728 use vars '%known_options';
  80         213  
  80         433887  
412             %known_options = (
413             'javascript' => 'allowJavascript',
414             'plugins' => 'allowPlugins',
415             'metaredirects' => 'allowMetaRedirects',
416             'subframes' => 'allowSubframes',
417             'frames' => 'allowSubframes',
418             'images' => 'allowImages',
419             );
420              
421             sub allow {
422 0     0 1   my ($self,%options) = @_;
423 0           my $shell = $self->docshell;
424 0           for my $opt (sort keys %options) {
425 0 0         if (my $opt_js = $known_options{ $opt }) {
426 0           $shell->{$opt_js} = $options{ $opt };
427             } else {
428 0           carp "Unknown option '$opt_js' (ignored)";
429             };
430             };
431             };
432              
433             =head2 C<< $mech->js_errors() >>
434              
435             print $_->{message}
436             for $mech->js_errors();
437              
438             An interface to the Javascript Error Console
439              
440             Returns the list of errors in the JEC
441              
442             Maybe this should be called C or
443             C instead.
444              
445             =cut
446              
447             sub js_console {
448 0     0 0   my ($self) = @_;
449 0           my $getConsoleService = $self->repl->declare(<<'JS');
450             function() {
451             return Components.classes["@mozilla.org/consoleservice;1"]
452             .getService(Components.interfaces.nsIConsoleService);
453             }
454             JS
455 0           $getConsoleService->()
456             }
457              
458             sub js_errors {
459 0     0 1   my ($self,$page) = @_;
460 0           my $console = $self->js_console;
461 0           my $getErrorMessages = $self->repl->declare(<<'JS', 'list');
462             function (consoleService) {
463             var out = {};
464             consoleService.getMessageArray(out, {});
465             return out.value || []
466             };
467             JS
468 0           $getErrorMessages->($console);
469             }
470              
471             =head2 C<< $mech->clear_js_errors() >>
472              
473             $mech->clear_js_errors();
474              
475             Clears all Javascript messages from the console
476              
477             =cut
478              
479             sub clear_js_errors {
480 0     0 1   my ($self,$page) = @_;
481 0           $self->js_console->reset;
482              
483             };
484              
485             =head2 C<< $mech->eval_in_page( $str [, $env [, $document]] ) >>
486              
487             =head2 C<< $mech->eval( $str [, $env [, $document]] ) >>
488              
489             my ($value, $type) = $mech->eval( '2+2' );
490              
491             Evaluates the given Javascript fragment in the
492             context of the web page.
493             Returns a pair of value and Javascript type.
494              
495             This allows access to variables and functions declared
496             "globally" on the web page.
497              
498             The returned result needs to be treated with
499             extreme care because
500             it might lead to Javascript execution in the context of
501             your application instead of the context of the webpage.
502             This should be evident for functions and complex data
503             structures like objects. When working with results from
504             untrusted sources, you can only safely use simple
505             types like C.
506              
507             If you want to modify the environment the code is run under,
508             pass in a hash reference as the second parameter. All keys
509             will be inserted into the C object as well as
510             C. Also, complex data structures are only
511             supported if they contain no objects.
512             If you need finer control, you'll have to
513             write the Javascript yourself.
514              
515             This method is special to WWW::Mechanize::Firefox.
516              
517             Also, using this method opens a potential B as
518             the returned values can be objects and using these objects
519             can execute malicious code in the context of the Firefox application.
520              
521             =cut
522              
523             sub eval_in_page {
524 0     0 1   my ($self,$str,$env,$doc,$window) = @_;
525 0   0       $env ||= {};
526 0           my $js_env = {};
527 0   0       $doc ||= $self->document;
528              
529             # do a manual transfer of keys, to circumvent our stupid
530             # transformation routine:
531 0 0         if (keys %$env) {
532 0           $js_env = $self->repl->declare(<<'JS')->();
533             function () { return new Object }
534             JS
535 0           for my $k (keys %$env) {
536 0           $js_env->{$k} = $env->{$k};
537             };
538             };
539              
540 0           my $eval_in_sandbox = $self->repl->declare(<<'JS', 'list');
541             function (w,d,str,env,caller,line) {
542             var unsafeWin = w.wrappedJSObject;
543             var safeWin = XPCNativeWrapper(unsafeWin);
544             var sandbox = Components.utils.Sandbox(safeWin);
545             sandbox.window = safeWin;
546             sandbox.document = d;
547             // Transfer the environment
548             for (var e in env) {
549             sandbox[e] = env[e]
550             sandbox.window[e] = env[e]
551             }
552             sandbox.__proto__ = unsafeWin;
553              
554             var res = Components.utils.evalInSandbox(str, sandbox, "1.8",caller,line);
555             return [res,typeof(res)];
556             };
557             JS
558 0   0       $window ||= $self->tab->{linkedBrowser}->{contentWindow};
559             # Report errors from scope of caller
560             # This feels weirdly backwards here, but oh well:
561             #local @CARP_NOT = (ref $self->repl); # we trust this
562              
563 0           my ($caller,$line) = (caller)[1,2];
564              
565 0           $eval_in_sandbox->($window,$doc,$str,$js_env,$caller,$line);
566             };
567             *eval = \&eval_in_page;
568              
569             =head2 C<< $mech->unsafe_page_property_access( ELEMENT ) >>
570              
571             Allows you unsafe access to properties of the current page. Using
572             such properties is an incredibly bad idea.
573              
574             This is why the function Cs. If you really want to use
575             this function, edit the source code.
576              
577             =cut
578              
579             sub unsafe_page_property_access {
580 0     0 1   my ($mech,$element) = @_;
581 0           die;
582 0           my $window = $mech->tab->{linkedBrowser}->{contentWindow};
583 0           my $unsafe = $window->{wrappedJSObject};
584 0           $unsafe->{$element}
585             };
586              
587             =head1 UI METHODS
588              
589             See also L for how to add more than one tab
590             and how to manipulate windows and tabs.
591              
592             =head2 C<< $mech->application() >>
593              
594             my $ff = $mech->application();
595              
596             Returns the L object for manipulating
597             more parts of the Firefox UI and application.
598              
599             =cut
600              
601 0     0 1   sub application { $_[0]->{app} };
602              
603             =head2 C<< $mech->autoclose_tab >>
604              
605             $mech->autoclose_tab( 0 ); # keep tab open after program end
606              
607             Set whether to close the tab associated with the instance.
608              
609             =cut
610              
611             sub autoclose_tab {
612 0     0 1   my $self = shift;
613 0           $self->application->autoclose_tab($self->tab, @_);
614             };
615              
616             =head2 C<< $mech->tab() >>
617              
618             Gets the object that represents the Firefox tab used by WWW::Mechanize::Firefox.
619              
620             This method is special to WWW::Mechanize::Firefox.
621              
622             =cut
623              
624 0     0 1   sub tab { $_[0]->{tab} };
625              
626             =head2 C<< $mech->make_progress_listener( %callbacks ) >>
627              
628             my $eventlistener = $mech->progress_listener(
629             onStateChange => \&onStateChange,
630             );
631              
632             Creates an unconnected C<< nsIWebProgressListener >> interface
633             which calls the Perl subroutines you pass in.
634              
635             Returns a handle. Once the handle gets released, all callbacks will
636             get stopped. Also, all Perl callbacks will get deregistered from the
637             Javascript bridge, so make sure not to use the same callback
638             in different progress listeners at the same time.
639             The sender may still call your callbacks.
640              
641             =cut
642              
643             sub make_progress_listener {
644 0     0 1   my ($mech,%handlers) = @_;
645 0           my $NOTIFY_STATE = $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
646             + $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS')
647             ;
648 0           my ($obj) = $mech->repl->expr('new Object');
649 0           for my $key (keys %handlers) {
650 0           $obj->{$key} = $handlers{$key};
651             };
652             #warn "Listener created";
653              
654 0           my $mk_nsIWebProgressListener = $mech->repl->declare(<<'JS');
655             function (myListener) {
656             var callbacks = ["onStateChange",
657             "onLocationChange",
658             "onProgressChange",
659             "onStatusChange",
660             "onSecurityChange"
661             // ,"onProgressChange64"
662             // ,"onRefreshAttempted"
663             ];
664             for (var h in callbacks) {
665             var e = callbacks[h];
666             if (! myListener[e]) {
667             myListener[e] = function(){}
668             } else {
669             // alert("Setting callback for " + e);
670             };
671             };
672             myListener.QueryInterface = function(aIID) {
673             if (aIID.equals(Components.interfaces.nsIWebProgressListener) ||
674             // aIID.equals(Components.interfaces.nsIWebProgressListener2) ||
675             aIID.equals(Components.interfaces.nsISupportsWeakReference) ||
676             aIID.equals(Components.interfaces.nsISupports))
677             return this;
678             throw Components.results.NS_NOINTERFACE;
679             };
680             return myListener
681             }
682             JS
683              
684             # Declare it here so we don't close over $lsn!
685             my $release = sub {
686 0 0   0     $_[0]->bridge->remove_callback(values %handlers)
687             if $_[0]->bridge;
688 0           };
689 0           my $lsn = $mk_nsIWebProgressListener->($obj);
690 0           $lsn->__on_destroy($release);
691 0           $lsn
692             };
693              
694              
695             =head2 C<< $mech->progress_listener( $source, %callbacks ) >>
696              
697             my $eventlistener = progress_listener(
698             $browser,
699             onLocationChange => \&onLocationChange,
700             );
701              
702             Sets up the callbacks for the C<< nsIWebProgressListener >> interface
703             to be the Perl subroutines you pass in.
704              
705             C< $source > needs to support C<.addProgressListener> and C<.removeProgressListener>.
706              
707             Returns a handle. Once the handle gets released, all callbacks will
708             get stopped. Also, all Perl callbacks will get deregistered from the
709             Javascript bridge, so make sure not to use the same callback
710             in different progress listeners at the same time.
711              
712             =cut
713              
714             sub progress_listener {
715 0     0 1   my ($self,$source,%handlers) = @_;
716              
717 0           my $lsn = $self->make_progress_listener(%handlers);
718 0           $lsn->{source} = $source;
719              
720 0           $lsn->__release_action('if(self.source)try{self.source.removeProgressListener(self)}catch(e){}');
721 0           my $NOTIFY_STATE = $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
722             + $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_LOCATION')
723             + $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS');
724 0           $source->addProgressListener($lsn,$NOTIFY_STATE);
725 0           $lsn
726             };
727              
728             =head2 C<< $mech->repl() >>
729              
730             my ($value,$type) = $mech->repl->expr('2+2');
731              
732             Gets the L instance that is used.
733              
734             This method is special to WWW::Mechanize::Firefox.
735              
736             =cut
737              
738 0     0 1   sub repl { $_[0]->application->repl };
739              
740             =head2 C<< $mech->highlight_node( @nodes ) >>
741              
742             my @links = $mech->selector('a');
743             $mech->highlight_node(@links);
744              
745             Convenience method that marks all nodes in the arguments
746             with
747              
748             background: red;
749             border: solid black 1px;
750             display: block; /* if the element was display: none before */
751              
752             This is convenient if you need visual verification that you've
753             got the right nodes.
754              
755             There currently is no way to restore the nodes to their original
756             visual state except reloading the page.
757              
758             =cut
759              
760             sub highlight_node {
761 0     0 1   my ($self,@nodes) = @_;
762 0           for (@nodes) {
763 0           my $style = $_->{style};
764             $style->{display} = 'block'
765 0 0         if $style->{display} eq 'none';
766 0           $style->{background} = 'red';
767 0           $style->{border} = 'solid black 1px;';
768             };
769             };
770              
771             =head1 NAVIGATION METHODS
772              
773             =head2 C<< $mech->get( $url, %options ) >>
774              
775             $mech->get( $url, ':content_file' => $tempfile );
776              
777             Retrieves the URL C into the tab.
778              
779             It returns a faked L object for interface compatibility
780             with L.
781              
782             Recognized options:
783              
784             =over 4
785              
786             =item *
787              
788             C<< :content_file >> - filename to store the data in
789              
790             =item *
791              
792             C<< no_cache >> - if true, bypass the browser cache
793              
794             =item *
795              
796             C<< synchronize >> - wait until all elements have loaded
797              
798             The default is to wait until all elements have loaded. You can switch
799             this off by passing
800              
801             synchronize => 0
802              
803             for example if you want to manually poll for an element that appears fairly
804             early during the load of a complex page.
805              
806             =back
807              
808             =cut
809              
810             sub get {
811 0     0 1   my ($self,$url, %options) = @_;
812 0           my $b = $self->tab->{linkedBrowser};
813 0           $self->clear_current_form;
814              
815 0           my $flags = 0;
816 0 0         if ($options{ no_cache }) {
817 0           $flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
818             };
819 0 0         if (! exists $options{ synchronize }) {
820 0           $options{ synchronize } = $self->events;
821             };
822 0 0         if( !ref $options{ synchronize }) {
823             $options{ synchronize } = $options{ synchronize }
824 0 0         ? $self->events
825             : []
826             };
827              
828             $self->_sync_call( $options{ synchronize }, sub {
829 0 0   0     if (my $target = delete $options{":content_file"}) {
830 0           $self->save_url($url => ''.$target, %options);
831             } else {
832 0           $b->loadURIWithFlags(''.$url,$flags);
833             };
834 0           });
835             };
836              
837             =head2 C<< $mech->get_local( $filename , %options ) >>
838              
839             $mech->get_local('test.html');
840              
841             Shorthand method to construct the appropriate
842             C<< file:// >> URI and load it into Firefox. Relative
843             paths will be interpreted as relative to C<$0>.
844              
845             This method accepts the same options as C<< ->get() >>.
846              
847             This method is special to WWW::Mechanize::Firefox but could
848             also exist in WWW::Mechanize through a plugin.
849              
850             Options:
851              
852             =over 4
853              
854             =item *
855              
856             B - a reference directory to use instead of C< dirname($0) >
857              
858             =back
859              
860             =cut
861              
862             sub get_local {
863 0     0 1   my ($self, $htmlfile, %options) = @_;
864 0           require Cwd;
865 0           require File::Spec;
866              
867 0           my $fn = $htmlfile;
868 0 0         if( ! File::Spec->file_name_is_absolute( $fn )) {
869 0   0       $options{ basedir } ||= dirname($0);
870             $fn = File::Spec->rel2abs(
871 0           File::Spec->catfile($options{basedir},$htmlfile),
872             Cwd::getcwd(),
873             );
874             };
875 0           $fn =~ s!\\!/!g; # fakey "make file:// URL"
876              
877 0           $self->get("file://$fn", %options);
878             }
879              
880             =head2 C<< $mech->post( $url, %options ) >>
881              
882             $mech->post( 'http://example.com',
883             params => { param => "Hello World" },
884             headers => {
885             "Content-Type" => 'application/x-www-form-urlencoded',
886             },
887             charset => 'utf-8',
888             );
889              
890             Sends a POST request to C<$url>.
891              
892             A C header will be automatically calculated if
893             it is not given.
894              
895             The following options are recognized:
896              
897             =over 4
898              
899             =item *
900              
901             C - a hash of HTTP headers to send. If not given,
902             the content type will be generated automatically.
903              
904             =item *
905              
906             C - the raw data to send, if you've encoded it already.
907              
908             =back
909              
910             =cut
911              
912             sub post {
913 0     0 1   my ($self, $url, %options) = @_;
914 0           my $b = $self->tab->{linkedBrowser};
915 0           $self->clear_current_form;
916              
917 0           my $flags = 0;
918 0 0         if ($options{no_cache}) {
919 0           $flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
920             };
921 0 0         if (! exists $options{synchronize}) {
922 0           $options{synchronize} = $self->events;
923             };
924 0 0         if( !ref $options{synchronize}) {
925             $options{synchronize} = $options{synchronize}
926 0 0         ? $self->events
927             : []
928             };
929              
930             # If we don't have data, encode the parameters:
931 0 0         if( !$options{ data }) {
932 0           my $req= HTTP::Request::Common::POST( $url, $options{params} );
933 0           $options{ data } = $req->content;
934             };
935              
936 0   0       $options{ charset } ||= 'utf-8';
937 0   0       $options{ headers } ||= {};
938 0   0       $options{ headers }->{"Content-Type"} ||= "application/x-www-form-urlencoded";
939 0 0         if( $options{ charset }) {
940 0           $options{ headers }->{"Content-Type"} .= "; charset=$options{ charset }";
941             };
942              
943 0           my $streamPostData = $self->repl->declare(<<'JS');
944             function(headers, dataString) {
945             // POST method requests must wrap the encoded text in a MIME stream
946             const Cc = Components.classes;
947             const Ci = Components.interfaces;
948             var stringStream = Cc["@mozilla.org/io/string-input-stream;1"].
949             createInstance(Ci.nsIStringInputStream);
950             if ("data" in stringStream) // Gecko 1.9 or newer
951             stringStream.data = dataString;
952             else // 1.8 or older
953             stringStream.setData(dataString, dataString.length);
954              
955             var postData = Cc["@mozilla.org/network/mime-input-stream;1"].
956             createInstance(Ci.nsIMIMEInputStream);
957             for( h in headers ) {
958             postData.addHeader( h, headers[h] );
959             };
960             postData.addContentLength = true;
961             postData.setData(stringStream);
962              
963             return postData;
964             }
965             JS
966              
967             $self->_sync_call($options{synchronize}, sub {
968 0     0     my $postData = $streamPostData->($options{headers}, $options{data});
969 0           $b->loadURIWithFlags(''.$url, $flags, undef, $options{charset}, $postData);
970 0           });
971             }
972              
973             =head2 C<< $mech->add_header( $name => $value, ... ) >>
974              
975             $mech->add_header(
976             'X-WWW-Mechanize-Firefox' => "I'm using it",
977             Encoding => 'text/klingon',
978             );
979              
980             This method sets up custom headers that will be sent with B HTTP(S)
981             request that Firefox makes.
982              
983             Using multiple instances of WWW::Mechanize::Firefox objects with the same
984             application together with changed request headers will most likely have weird
985             effects. So don't do that.
986              
987             Note that currently, we only support one value per header.
988              
989             Some versions of Firefox don't work with the method that is used to set
990             the custom headers. Please see C for the exact
991             versions where the implemented mechanism doesn't work. Roughly, this is
992             for versions 17 to 24 of Firefox.
993              
994             =cut
995              
996             # This subroutine creates the custom header observer. It has a hashref
997             # of headers that it will add to EACH request that Firefox sends out.
998             # It removes itself when the Perl object gets destroyed.
999             sub _custom_header_observer {
1000 0     0     my ($self, @headers) = @_;
1001              
1002             # This routine was taken from http://d.hatena.ne.jp/oppara/20090410/p1
1003 0           my $on_modify_request = $self->repl->declare(<<'JS');
1004             function() { // headers passed via arguments
1005             const Cc= Components.classes;
1006             const Ci= Components.interfaces;
1007             const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1008             var h= [].slice.call(arguments);
1009             var hr= {};
1010             for( var i=0; i
1011             var k= h[i];
1012             var v= h[i+1];
1013             hr[k]= v;
1014             };
1015              
1016             var myObserver= {
1017             headers: hr,
1018             observe: function(subject,topic,data) {
1019             if(topic != 'http-on-modify-request') return;
1020              
1021             var http = subject.QueryInterface(Ci.nsIHttpChannel);
1022             for( var k in this.headers) {
1023             var v= this.headers[k];
1024             http.setRequestHeader(k,v, false);
1025              
1026             if (k== 'Referer' && http.referrer) {
1027             http.referrer.spec = v;
1028             };
1029             };
1030             }
1031             }
1032             observerService.addObserver(myObserver,'http-on-modify-request',false);
1033             return myObserver;
1034             };
1035             JS
1036 0           my $obs = $on_modify_request->(@headers);
1037              
1038             # Clean up after ourselves
1039 0           $obs->__release_action(<<'JS');
1040             const Cc= Components.classes;
1041             const Ci= Components.interfaces;
1042             const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1043             try {
1044             observerService.removeObserver(self,'http-on-modify-request',false);
1045             } catch (e) {}
1046             JS
1047 0           return $obs;
1048             };
1049              
1050             sub add_header {
1051 0     0 1   my ($self, @headers) = @_;
1052 0   0       $self->{custom_header_observer} ||= $self->_custom_header_observer;
1053              
1054             # This is slooow, but we only do it when changing the headers...
1055 0           my $h = $self->{custom_header_observer}->{headers};
1056 0           while( my ($k,$v) = splice @headers, 0, 2 ) {
1057 0           $h->{$k} = $v;
1058             };
1059             };
1060              
1061             =head2 C<< $mech->delete_header( $name , $name2... ) >>
1062              
1063             $mech->delete_header( 'User-Agent' );
1064              
1065             Removes HTTP headers from the agent's list of special headers. Note
1066             that Firefox may still send a header with its default value.
1067              
1068             =cut
1069              
1070             sub delete_header {
1071 0     0 1   my ($self, @headers) = @_;
1072              
1073 0 0 0       if( $self->{custom_header_observer} and @headers ) {
1074             # This is slooow, but we only do it when changing the headers...
1075 0           my $h = $self->{custom_header_observer}->{headers};
1076              
1077             delete $h->{$_}
1078 0           for( @headers );
1079             };
1080             };
1081              
1082             =head2 C<< $mech->reset_headers >>
1083              
1084             $mech->reset_headers();
1085              
1086             Removes all custom headers and makes Firefox send its defaults again.
1087              
1088             =cut
1089              
1090             sub reset_headers {
1091 0     0 1   my ($self) = @_;
1092 0           delete $self->{custom_header_observer};
1093             };
1094              
1095             sub _addLoadEventListener {
1096 0     0     my ($self,%options) = @_;
1097              
1098 0   0       $options{ tab } ||= $self->tab;
1099 0   0       $options{ window } ||= $self->application->getMostRecentWindow;
1100 0   0       $options{ events } ||= $self->events;
1101 0           my $add_load_listener = $self->repl->declare(<<'JS');
1102             function( mainWindow, tab, waitForLoad, events ) {
1103             var browser= mainWindow.gBrowser.getBrowserForTab( tab );
1104              
1105             var lock= {
1106             "busy": 1,
1107             "log":[],
1108             "events": events,
1109             "browser": browser,
1110             "cb": undefined,
1111             "release": function() {
1112             for(var i=0; i
1113             this.browser.removeEventListener(this.events[i], this.cb, true);
1114             };
1115             }
1116             };
1117             var unloadedFrames= [];
1118              
1119             lock.cb= function (e) {
1120             var t= e.target;
1121             var toplevel= (t == browser.contentDocument);
1122             lock.log.push("Event "+e.type);
1123             var reloadedFrame= false;
1124             lock.log.push( "" + unloadedFrames.length + " frames.");
1125              
1126             if( "FRAME" == t.tagName
1127             || "IFRAME" == t.tagName ) {
1128             loc= t.src;
1129             } else if( !t.tagName ) {
1130             // Document
1131             loc= t.URL;
1132             } else { // ignore
1133             lock.log.push("Ignoring " + e.type + " on " + t.tagName);
1134             };
1135             try {
1136             if( t instanceof HTMLDocument ) {
1137             // We are only interested in HTML pages here
1138             var container= t.defaultView.frameElement || browser.contentWindow;
1139             for( var i=0; i < unloadedFrames.length; i++ ) {
1140             try {
1141             // lock.log.push( "" + i + " " + unloadedFrames[i].id + " - " + unloadedFrames[i].src );
1142             reloadedFrame= reloadedFrame
1143             || unloadedFrames[i] === container;
1144             } catch (e) {
1145             // alert("Some frame element has gone away already...");
1146             };
1147             // alert("Caught " + e.type + " on remembered element. Great - " + reloadedFrame);
1148             };
1149              
1150             if ("pagehide" == e.type && container ) {
1151             // alert("pagehide on container /lock"+lock.id);
1152             // A frame or window gets reloaded.
1153             // A frame gets reloaded. We remember it so we can
1154             // tell when it has completed. We won't get a separate
1155             // completion event on the parent document :-(
1156             lock.log.push("Remembering frame parent, for 'load' event");
1157             unloadedFrames.push( container );
1158             // Maybe we should just attach all events here?!
1159             };
1160             };
1161             } catch (e) { alert("Error while looking: " + e.message+" " + e.line) };
1162              
1163             // if (! toplevel && !reloadedFrame ) { return ; };
1164             lock.log.push("<> " + e.type + " on " + loc);
1165              
1166             if( (reloadedFrame)
1167             // && !waitForLoad
1168             && "DOMContentLoaded" == e.type
1169             ) {
1170             // We loaded a document
1171             // See if it contains (i)frames
1172             // and wait for "load" to fire if so
1173             // alert("Reloaded a container /lock:" + lock.id);
1174             lock.log.push("DOMContentLoaded for toplevel");
1175             var q= "//IFRAME|//FRAME";
1176             var frames= t.evaluate(q,t,null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ).snapshotLength;
1177             lock.log.push("Found " + frames + " frames");
1178             if( frames ) {
1179             lock.log.push("Waiting for 'load' because we found frames");
1180             waitForLoad= true;
1181             } else if( /^about:neterror\?/.test( loc ) || !waitForLoad ) {
1182             lock.log.push("Early out on DOMContentLoaded");
1183             lock.busy= 0;
1184             };
1185              
1186             } else if( (reloadedFrame)
1187             && ( "load" == e.type
1188             || "pageshow" == e.type
1189             )) { // We always are done on "load" on toplevel
1190             lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1191             lock.busy= 0;
1192              
1193             } else if( (toplevel || reloadedFrame)
1194             && ("error" == e.type || "stop" == e.type)) { // We always are done on "load" on toplevel
1195             lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1196             lock.busy= 0;
1197             };
1198              
1199             };
1200              
1201             for(var i=0; i
1202             browser.addEventListener(events[i], lock.cb, true);
1203             };
1204             lock.log.push("Listening");
1205              
1206             return lock
1207             }
1208             JS
1209 0           return $add_load_listener->($options{ window }, $options{ tab }, 1, $options{ events } );
1210             }
1211              
1212             sub _addEventListener {
1213 0     0     my ($self,@args) = @_;
1214 0 0 0       if (@args <= 2 and ref($args[0]) eq 'MozRepl::RemoteObject::Instance') {
1215 0           @args = [@args];
1216             };
1217 0           for (@args) {
1218 0   0       $_->[1] ||= $self->events;
1219 0 0         $_->[1] = [$_->[1]]
1220             unless ref $_->[1];
1221             };
1222             # Now, flatten the arg list again...
1223 0           @args = map { @$_ } @args;
  0            
1224              
1225             # This registers multiple events for a one-shot event
1226 0           my $make_semaphore = $self->repl->declare(<<'JS');
1227             function() {
1228             var lock = { "busy": 0, "event" : null };
1229             var listeners = [];
1230             var pairs = arguments;
1231             for( var k = 0; k < pairs.length ; k++) {
1232             var b = pairs[k];
1233             k++;
1234             var events = pairs[k];
1235              
1236             for( var i = 0; i < events.length; i++) {
1237             var evname = events[i];
1238             var callback = (function(listeners,evname){
1239             return function(e) {
1240             if (! lock.busy) {
1241             lock.busy++;
1242             lock.event = e.type;
1243             lock.js_event = {};
1244             lock.js_event.target = e.originalTarget;
1245             lock.js_event.type = e.type;
1246             //alert("Caught first event " + e.type + " " + e.message);
1247             } else {
1248             //alert("Caught duplicate event " + e.type + " " + e.message);
1249             };
1250             for( var j = 0; j < listeners.length; j++) {
1251             listeners[j][0].removeEventListener(listeners[j][1],listeners[j][2],true);
1252             };
1253             };
1254             })(listeners,evname);
1255             listeners.push([b,evname,callback]);
1256             b.addEventListener(evname,callback,true);
1257             };
1258             };
1259             return lock
1260             }
1261             JS
1262             # $browser,$events
1263 0           return $make_semaphore->(@args);
1264             };
1265              
1266             sub _wait_while_busy {
1267 0     0     my ($self,@elements) = @_;
1268             # Now do the busy-wait
1269             # Should this also include a ->poll()
1270             # and a callback?
1271 0           my $i=0;
1272 0           while (1) {
1273 0           $i++;
1274 0 0         last if($i == 30 );
1275 0           for my $element (@elements) {
1276 0 0 0       if ((my $s = $element->{busy} || 0) < 1) {
1277 0           for my $element (@elements) {
1278 0           push @{ $self->{event_log} },
1279 0           join "\n", @{ $element->{log}};
  0            
1280             };
1281 0           return $element;
1282             };
1283             };
1284 0           sleep 0.1;
1285              
1286             # if (time-$timer > 4) {
1287             # $timer= time;
1288             # for my $element (@elements) {
1289             # for (@{ $element->{log}}) {
1290             # print $_,"\n";
1291             # };
1292             # print "---\n";
1293             # };
1294             # };
1295             };
1296             }
1297              
1298             =head2 C<< $mech->synchronize( $event, $callback ) >>
1299              
1300             Wraps a synchronization semaphore around the callback
1301             and waits until the event C<$event> fires on the browser.
1302             If you want to wait for one of multiple events to occur,
1303             pass an array reference as the first parameter.
1304              
1305             Usually, you want to use it like this:
1306              
1307             my $l = $mech->xpath('//a[@onclick]', single => 1);
1308             $mech->synchronize('DOMFrameContentLoaded', sub {
1309             $mech->click( $l );
1310             });
1311              
1312             It is necessary to synchronize with the browser whenever
1313             a click performs an action that takes longer and
1314             fires an event on the browser object.
1315              
1316             The C event is fired by Firefox when
1317             the whole DOM and all C