File Coverage

blib/lib/WWW/HtmlUnit.pm
Criterion Covered Total %
statement 13 32 40.6
branch 2 6 33.3
condition n/a
subroutine 3 6 50.0
pod 1 3 33.3
total 19 47 40.4


line stmt bran cond sub pod time code
1             package WWW::HtmlUnit;
2              
3             =head1 NAME
4              
5             WWW::HtmlUnit - Inline::Java based wrapper of the HtmlUnit v2.14 library
6              
7             =head1 SYNOPSIS
8              
9             use WWW::HtmlUnit;
10             my $webClient = WWW::HtmlUnit->new;
11             my $page = $webClient->getPage("http://google.com/");
12             my $f = $page->getFormByName('f');
13             my $submit = $f->getInputByName("btnG");
14             my $query = $f->getInputByName("q");
15             $page = $query->type("HtmlUnit");
16             $page = $query->type("\n");
17              
18             my $content = $page->asXml;
19             print "Result:\n$content\n\n";
20              
21             =head1 DESCRIPTION
22              
23             This is a wrapper around the HtmlUnit library. It includes the HtmlUnit jar itself and it's dependencies. All this library really does is find the jars and load them up using L.
24              
25             The reason all this is interesting? HtmlUnit has very good javascript support, so you can automate, scrape, or test javascript-required websites.
26              
27             See especially the HtmlUnit documentation on their site for deeper API documentation, L.
28              
29             =head1 INSTALLING
30              
31             There is one special thing that I've run into when installing L, and thus L, which is telling the installer where to find your java home. It turns out this is really really easy, just define the JAVA_HOME environment variable before you start your CPAN shell / installer. From Debian/Ubuntu, I do:
32              
33             sudo apt-get install default-jdk
34             sudo JAVA_HOME=/usr/lib/jvm/default-java cpanm WWW::HtmlUnit
35              
36             and everything works the way I want!
37              
38             =head1 DOCUMENTATION
39              
40             You can get the bulk of the documentation directly from the L. Since WWW::HtmlUnit is mostly a wrapper around the real Java API, what you actually have to do is translate some of the java notation into perl notation. Mostly this is replacing '.' with '->'.
41              
42             Key classes that you might want to look at:
43              
44             =over 4
45              
46             =item L
47              
48             Represents a web browser. This is what C<< WWW::HtmlUnit->new >> returns.
49              
50             =item L
51              
52             A single HTML Page.
53              
54             =item L
55              
56             An individual HTML element (node).
57              
58             =back
59              
60             Also see L for a way to pretend that HtmlUnit works a little like L, but not really.
61              
62             =cut
63              
64 4     4   62559 use strict;
  4         9  
  4         147  
65 4     4   26 use warnings;
  4         6  
  4         2088  
66              
67             our $VERSION = '0.22';
68              
69             sub find_jar_path {
70 0     0 0 0 my $self = shift;
71 0         0 my $path = $INC{'WWW/HtmlUnit.pm'};
72 0         0 $path =~ s/\.pm$/\/jar/;
73 0         0 return $path;
74             }
75              
76             our $classpath_separator = $^O =~ /win/i ? ";" : ":";
77             sub collect_default_jars {
78 0     0 0 0 my $jar_path = find_jar_path();
79 0         0 return join $classpath_separator, map { "$jar_path/$_" } qw(
  0         0  
80             commons-codec-1.9.jar
81             commons-collections-3.2.1.jar
82             commons-io-2.4.jar
83             commons-lang3-3.2.1.jar
84             commons-logging-1.1.3.jar
85             cssparser-0.9.13.jar
86             htmlunit-2.14.jar
87             htmlunit-confirmhandler-2.8.jar
88             htmlunit-core-js-2.14.jar
89             httpclient-4.3.2.jar
90             httpcore-4.3.1.jar
91             httpmime-4.3.2.jar
92             jetty-http-8.1.14.v20131031.jar
93             jetty-io-8.1.14.v20131031.jar
94             jetty-util-8.1.14.v20131031.jar
95             jetty-websocket-8.1.14.v20131031.jar
96             nekohtml-1.9.20.jar
97             sac-1.3.jar
98             serializer-2.7.1.jar
99             xalan-2.7.1.jar
100             xercesImpl-2.11.0.jar
101             xml-apis-1.4.01.jar
102             );
103             }
104              
105             =head1 MODULE IMPORT PARAMETERS
106              
107             In general, any parameters you pass while importing ('use'-ing) L will be passed on to L. A handy one is the 'DIRECTORY' parameter, for example. A few parameters are handled specially, however.
108              
109             If you need to include extra .jar files, and/or if you want to study more java classes, you can do:
110              
111             use HtmlUnit
112             jars => ['/path/to/blah.jar'],
113             study => ['class.to.study'];
114              
115             and that will be added to the list of jars for L to autostudy, and add to the list of classes for L to immediately study. A class must be on the study list to be directly instantiated.
116              
117             Whether you ask for it or not, WebClient, BrowserVersion, and Cookie (each in the com.gargoylesoftware.htmlunit package) are studied. You can get to studied classes by adding WWW::HtmlUnit:: to their package name. So, you could make a cookie like this:
118              
119             my $cookie = WWW::HtmlUnit::com::gargoylesoftware::htmlunit::Cookie->new($name, $value);
120             $webClient->getCookieManager->addCookie($cookie);
121              
122             Which is, incidentally, just the sort of thing that I should wrap in WWW::HtmlUnit::Sweet or elsewhere, 'cause that is UGLY!
123              
124             =cut
125              
126             sub import {
127 4     4   22 my $class = shift;
128 4         11 my %parameters = @_;
129 4         7 my $custom_jars = "";
130 4 50       20 if ($parameters{'jars'}) {
131 0         0 $custom_jars = join($classpath_separator, @{$parameters{'jars'}});
  0         0  
132 0         0 delete $parameters{'jars'};
133             }
134              
135 4         10 my @STUDY = (
136             'com.gargoylesoftware.htmlunit.WebClient',
137             'com.gargoylesoftware.htmlunit.BrowserVersion',
138             'com.gargoylesoftware.htmlunit.util.Cookie',
139             'com.gargoylesoftware.htmlunit.CollectingAlertHandler',
140             'com.gargoylesoftware.htmlunit.ClickConfirmHandler',
141             );
142 4 50       13 if ($parameters{'study'}) {
143 0         0 push(@STUDY, @{$parameters{'study'}});
  0         0  
144 0         0 delete $parameters{'study'};
145             }
146              
147 4         5038 require Inline;
148 0           Inline->import(
149             Java => 'STUDY',
150             STUDY => \@STUDY,
151             AUTOSTUDY => 1,
152             CLASSPATH => collect_default_jars() . $classpath_separator . $custom_jars,
153             %parameters
154             );
155             }
156              
157             =head1 METHODS
158              
159             =head2 $webClient = WWW::HtmlUnit->new($browser_name)
160              
161             This is just a shortcut for
162              
163             $webClient = WWW::HtmlUnit::com::gargoylesoftware::htmlunit::WebClient->new;
164              
165             The optional $browser_name allows you to specify which browser version to pass to the WebClient->new method. You could pass "FIREFOX_3" for example, to make the engine especially try to emulate Firefox 3 quirks, I imagine.
166              
167             =cut
168              
169             sub new {
170 0     0 1   my ($class, $version) = @_;
171 0 0         if($version) {
172 0           my $browser_version = eval "\$WWW::HtmlUnit::com::gargoylesoftware::htmlunit::BrowserVersion::$version";
173 0           return WWW::HtmlUnit::com::gargoylesoftware::htmlunit::WebClient->new($browser_version);
174             } else {
175 0           return WWW::HtmlUnit::com::gargoylesoftware::htmlunit::WebClient->new;
176             }
177             }
178              
179             =head1 DEPENDENCIES
180              
181             When installed using the CPAN shell, all dependencies besides java itself will be installed. This includes the HtmlUnit jar files, and in fact those files make up the bulk of the distribution, byte-wise.
182              
183             =head1 TIPS
184              
185             =head2 Working with java list/collections
186              
187             When you get a java list, it is actually an object-thingie. You gotta call C<< ->toArray() >> on it, and then you'll get a lovely perl arrayref, which is most likely what you wanted in the first place. I am open to suggestions for a mass work-around for this.
188              
189              
190             =head2 HTTP Authentication
191              
192             my $credentialsProvider = $webclient->getCredentialsProvider;
193             $credentialsProvider->addCredentials($username, $password);
194              
195             =head2 Disable SSL certificate checking
196              
197             $webclient->setUseInsecureSSL(1);
198              
199             =head2 Handling alerts and confirmations
200              
201             We (thanks lungching!) wrote a wee bit of java to make this easy. Though I admit that it could be a bit more... perlish. For a full example, see L.
202              
203             my $alert_handler = WWW::HtmlUnit::com::gargoylesoftware::htmlunit::CollectingAlertHandler->new();
204             $webClient->setAlertHandler($alert_handler);
205             # ...
206             my $alert_arrayref = $alert_handler->getCollectedAlerts->toArray();
207              
208             =head1 TODO
209              
210             =over 4
211              
212             =item * Capture HtmlUnit output to a variable
213              
214             =item * Use that to have a quiet-mode
215              
216             =item * Document lungching's confirmation handler code, automate build
217              
218             =back
219              
220             =head1 SEE ALSO
221              
222             L, L, L
223              
224             =head1 AUTHOR
225              
226             Brock Wilcox - http://thelackthereof.org/
227              
228             =head1 COPYRIGHT
229              
230             Copyright (c) 2009-2014 Brock Wilcox . All rights
231             reserved. This program is free software; you can redistribute it and/or
232             modify it under the same terms as Perl itself.
233              
234             HtmlUnit library includes the following copyright:
235              
236             /*
237             * Copyright (c) 2002-2014 Gargoyle Software Inc.
238             *
239             * Licensed under the Apache License, Version 2.0 (the "License");
240             * you may not use this file except in compliance with the License.
241             * You may obtain a copy of the License at
242             * http://www.apache.org/licenses/LICENSE-2.0
243             *
244             * Unless required by applicable law or agreed to in writing, software
245             * distributed under the License is distributed on an "AS IS" BASIS,
246             * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
247             * See the License for the specific language governing permissions and
248             * limitations under the License.
249             */
250              
251             =cut
252              
253             1;
254