File Coverage

blib/lib/WWW/Mechanize/TreeBuilder.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package WWW::Mechanize::TreeBuilder;
2              
3             =head1 NAME
4              
5             WWW::Mechanize::TreeBuilder
6              
7             =head1 SYNOPSIS
8              
9             use Test::More tests => 2;
10             use Test::WWW::Mechanize;
11             use WWW::Mechanize::TreeBuilder;
12             # or
13             # use WWW::Mechanize;
14             # or
15             # use Test::WWW::Mechanize::Catalyst 'MyApp';
16              
17             my $mech = Test::WWW::Mechanize->new;
18             # or
19             #my $mech = Test::WWW::Mechanize::Catalyst->new;
20             # etc. etc.
21             WWW::Mechanize::TreeBuilder->meta->apply($mech);
22              
23             $mech->get_ok('/');
24             is( $mech->look_down(_tag => 'p')->as_trimmed_text, 'Some text', 'It worked' );
25              
26             =head1 DESCRIPTION
27              
28             This module combines L<WWW::Mechanize> and L<HTML::TreeBuilder>. Why? Because I've
29             seen too much code like the following:
30              
31             like($mech->content, qr{<p>some text</p>}, "Found the right tag");
32              
33             Which is just all flavours of wrong - its akin to processing XML with regexps.
34             Instead, do it like the following:
35              
36             ok($mech->look_down(_tag => 'p', sub { $_[0]->as_trimmed_text eq 'some text' })
37              
38             The anon-sub there is a bit icky, but this means that anyone should happen to
39             add attributes to the C<< <p> >> tag (such as an id or a class) it will still
40             work and find the right tag.
41              
42             All of the methods avaiable on L<HTML::Element> (that aren't 'private' - i.e.
43             that don't begin with an underscore) such as C<look_down> or C<find> are
44             automatically delegated to C<< $mech->tree >> through the magic of Moose.
45              
46             =head1 METHODS
47              
48             Everything in L<WWW::Mechanize> (or which ever sub class you apply it to) and
49             all public methods from L<HTML::Element> except those where WWW::Mechanize and
50             HTML::Element overlap. In the case where the two classes both define a method,
51             the one from WWW::Mechanize will be used (so that the existing behaviour of
52             Mechanize doesn't break.)
53              
54             =head1 USING XPATH OR OTHER SUBCLASSES
55              
56             L<HTML::TreeBuilder::XPath> allows you to use use xpath selectors to select
57             elements in the tree. You can use that module by providing parameters to the
58             moose role:
59              
60             with 'WWW::Mechanize::TreeBuilder' => {
61             tree_class => 'HTML::TreeBuilder::XPath'
62             };
63              
64             # or
65            
66             # NOTE: No hashref using this method
67             WWW::Mechanize::TreeBuilder->meta->apply($mech,
68             tree_class => 'HTML::TreeBuilder::XPath';
69             );
70              
71             and class will be automatically loaded for you. This class will be used to
72             construct the tree in the following manner:
73              
74             $tree = $tree_class->new_from_content($req->decoded_content)->elementify;
75              
76             You can also specify a C<element_class> parameter which is the (HTML::Element
77             sub)class that methods are proxied from. This module provides defaults for
78             element_class when C<tree_class> is "HTML::TreeBuilder" or
79             "HTML::TreeBuilder::XPath" - it will warn otherwise.
80              
81             =cut
82              
83 2     2   55157 use MooseX::Role::Parameterized;
  0            
  0            
84             use Moose::Util::TypeConstraints;
85             #use HTML::TreeBuilder;
86              
87             subtype 'WWW.Mechanize.TreeBuilder.LoadClass'
88             => as 'Str'
89             => where { Class::MOP::load_class($_) }
90             => message { "Cannot load class $_" };
91              
92             subtype 'WWW.Mechanize.TreeBuilder.TreeClass'
93             => as 'WWW.Mechanize.TreeBuilder.LoadClass'
94             => where { $_->isa('HTML::TreeBuilder') }
95             => message { "$_ isn't a subclass of HTML::TreeBuilder (or it can't be loaded)" };
96              
97             subtype 'WWW.Mechanize.TreeBuilder.ElementClass'
98             => as 'WWW.Mechanize.TreeBuilder.LoadClass',
99             => where { $_->isa('HTML::Element') }
100             => message { "$_ isn't a subclass of HTML::Element (or it can't be loaded)" };
101              
102             our $VERSION = '1.10003';
103              
104             parameter tree_class => (
105             isa => 'WWW.Mechanize.TreeBuilder.TreeClass',
106             required => 1,
107             default => 'HTML::TreeBuilder',
108             );
109              
110             parameter element_class => (
111             isa => 'WWW.Mechanize.TreeBuilder.ElementClass',
112             lazy => 1,
113             default => 'HTML::Element',
114             predicate => 'has_element_class'
115             );
116              
117             # Used if element_class is not provided to give sane defaults
118             our %ELEMENT_CLASS_MAPPING = (
119             'HTML::TreeBuilder' => 'HTML::Element',
120              
121             # HTML::TreeBuilder::XPath does it wrong.
122             #'HTML::TreeBuilder::XPath' => 'HTML::TreeBuilder::XPath::Node'
123             'HTML::TreeBuilder::XPath' => 'HTML::Element'
124             );
125              
126             role {
127             my $p = shift;
128              
129             my $tree_class = $p->tree_class;
130             my $ele_class;
131             unless ($p->has_element_class) {
132             $ele_class = $ELEMENT_CLASS_MAPPING{$tree_class};
133              
134             if (!defined( $ele_class ) ) {
135             local $Carp::Internal{'MooseX::Role::Parameterized::Meta::Role::Parameterizable'} = 1;
136             Carp::carp "WWW::Mechanize::TreeBuilder element_class not specified for overridden tree_class of $tree_class";
137             $ele_class = "HTML::Element";
138             }
139              
140             } else {
141             $ele_class = $p->element_class;
142             }
143              
144             requires '_make_request';
145              
146             has 'tree' => (
147             is => 'ro',
148             isa => $ele_class,
149             writer => '_set_tree',
150             predicate => 'has_tree',
151             clearer => 'clear_tree',
152              
153             # Since HTML::Element isn't a moose object, i have to 'list' everything I
154             # want it to handle myself here. how annoying. But since I'm lazy, I'll just
155             # take all subs from the symbol table that dont start with a _
156             handles => sub {
157             my ($attr, $delegate_class) = @_;
158              
159             my %methods = map { $_->name => 1
160             } $attr->associated_class->get_all_methods;
161              
162             return
163             map { $_->name => $_->name }
164             grep { my $n = $_->name; $n !~ /^_/ && !$methods{$n} }
165             $delegate_class->get_all_methods;
166             }
167             );
168              
169             around '_make_request' => sub {
170             my $orig = shift;
171             my $self = shift;
172             my $ret = $self->$orig(@_);
173              
174             # Someone needs to learn about weak refs
175             if ($self->has_tree) {
176             $self->tree->delete;
177             $self->clear_tree;
178             }
179              
180             if ($ret->content_type =~ m[^(text/html|application/(?:.*?\+)xml)]) {
181             $self->_set_tree( $tree_class->new_from_content($ret->decoded_content)->elementify );
182             }
183            
184             return $ret;
185             };
186              
187             sub DEMOLISH {
188             my $self = shift;
189             $self->tree->delete if $self->has_tree;
190             }
191              
192             };
193              
194             no Moose::Util::TypeConstraints;
195             no MooseX::Role::Parameterized;
196              
197             =head1 AUTHOR
198              
199             Ash Berlin C<< <ash@cpan.org> >>
200              
201             =head1 LICENSE
202              
203             Same as Perl 5.8, or at your option any later version of Perl.
204              
205             =cut
206              
207             1;