File Coverage

blib/lib/WWW/Mechanize/TreeBuilder.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package WWW::Mechanize::TreeBuilder;
2              
3             =head1 NAME
4              
5             WWW::Mechanize::TreeBuilder - combine WWW::Mechanize and HTML::TreeBuilder in nice ways
6              
7             =head1 SYNOPSIS
8              
9             use Test::More tests => 2;
10             use Test::WWW::Mechanize;
11             use WWW::Mechanize::TreeBuilder;
12             # or
13             # use WWW::Mechanize;
14             # or
15             # use Test::WWW::Mechanize::Catalyst 'MyApp';
16              
17             my $mech = Test::WWW::Mechanize->new;
18             # or
19             #my $mech = Test::WWW::Mechanize::Catalyst->new;
20             # etc. etc.
21             WWW::Mechanize::TreeBuilder->meta->apply($mech);
22              
23             $mech->get_ok('/');
24             is( $mech->look_down(_tag => 'p')->as_trimmed_text, 'Some text', 'It worked' );
25              
26             =head1 DESCRIPTION
27              
28             This module combines L<WWW::Mechanize> and L<HTML::TreeBuilder>. Why? Because I've
29             seen too much code like the following:
30              
31             like($mech->content, qr{<p>some text</p>}, "Found the right tag");
32              
33             Which is just all flavours of wrong - its akin to processing XML with regexps.
34             Instead, do it like the following:
35              
36             ok($mech->look_down(_tag => 'p', sub { $_[0]->as_trimmed_text eq 'some text' })
37              
38             The anon-sub there is a bit icky, but this means that anyone should happen to
39             add attributes to the C<< <p> >> tag (such as an id or a class) it will still
40             work and find the right tag.
41              
42             All of the methods available on L<HTML::Element> (that aren't 'private' - i.e.
43             that don't begin with an underscore) such as C<look_down> or C<find> are
44             automatically delegated to C<< $mech->tree >> through the magic of Moose.
45              
46             =head1 METHODS
47              
48             Everything in L<WWW::Mechanize> (or which ever sub class you apply it to) and
49             all public methods from L<HTML::Element> except those where WWW::Mechanize and
50             HTML::Element overlap. In the case where the two classes both define a method,
51             the one from WWW::Mechanize will be used (so that the existing behaviour of
52             Mechanize doesn't break.)
53              
54             =head1 USING XPATH OR OTHER SUBCLASSES
55              
56             L<HTML::TreeBuilder::XPath> allows you to use xpath selectors to select
57             elements in the tree. You can use that module by providing parameters to the
58             moose role:
59              
60             with 'WWW::Mechanize::TreeBuilder' => {
61             tree_class => 'HTML::TreeBuilder::XPath'
62             };
63              
64             # or
65            
66             # NOTE: No hashref using this method
67             WWW::Mechanize::TreeBuilder->meta->apply($mech,
68             tree_class => 'HTML::TreeBuilder::XPath';
69             );
70              
71             and class will be automatically loaded for you. This class will be used to
72             construct the tree in the following manner:
73              
74             $tree = $tree_class->new_from_content($req->decoded_content)->elementify;
75              
76             You can also specify a C<element_class> parameter which is the (HTML::Element
77             sub)class that methods are proxied from. This module provides defaults for
78             element_class when C<tree_class> is "HTML::TreeBuilder" or
79             "HTML::TreeBuilder::XPath" - it will warn otherwise.
80              
81             =cut
82              
83 2     2   37716 use MooseX::Role::Parameterized;
  0            
  0            
84             use Moose::Util::TypeConstraints;
85             use Class::Load 'load_class';
86             #use HTML::TreeBuilder;
87              
88             subtype 'WWW.Mechanize.TreeBuilder.LoadClass'
89             => as 'Str'
90             => where { load_class($_) }
91             => message { "Cannot load class $_" };
92              
93             subtype 'WWW.Mechanize.TreeBuilder.TreeClass'
94             => as 'WWW.Mechanize.TreeBuilder.LoadClass'
95             => where { $_->isa('HTML::TreeBuilder') }
96             => message { "$_ isn't a subclass of HTML::TreeBuilder (or it can't be loaded)" };
97              
98             subtype 'WWW.Mechanize.TreeBuilder.ElementClass'
99             => as 'WWW.Mechanize.TreeBuilder.LoadClass',
100             => where { $_->isa('HTML::Element') }
101             => message { "$_ isn't a subclass of HTML::Element (or it can't be loaded)" };
102              
103             our $VERSION = '1.20000';
104              
105             parameter tree_class => (
106             isa => 'WWW.Mechanize.TreeBuilder.TreeClass',
107             required => 1,
108             default => 'HTML::TreeBuilder',
109             );
110              
111             parameter element_class => (
112             isa => 'WWW.Mechanize.TreeBuilder.ElementClass',
113             lazy => 1,
114             default => 'HTML::Element',
115             predicate => 'has_element_class'
116             );
117              
118             # Used if element_class is not provided to give sane defaults
119             our %ELEMENT_CLASS_MAPPING = (
120             'HTML::TreeBuilder' => 'HTML::Element',
121              
122             # HTML::TreeBuilder::XPath does it wrong.
123             #'HTML::TreeBuilder::XPath' => 'HTML::TreeBuilder::XPath::Node'
124             'HTML::TreeBuilder::XPath' => 'HTML::Element'
125             );
126              
127             role {
128             my $p = shift;
129              
130             my $tree_class = $p->tree_class;
131             my $ele_class;
132             unless ($p->has_element_class) {
133             $ele_class = $ELEMENT_CLASS_MAPPING{$tree_class};
134              
135             if (!defined( $ele_class ) ) {
136             local $Carp::Internal{'MooseX::Role::Parameterized::Meta::Role::Parameterizable'} = 1;
137             Carp::carp "WWW::Mechanize::TreeBuilder element_class not specified for overridden tree_class of $tree_class";
138             $ele_class = "HTML::Element";
139             }
140              
141             } else {
142             $ele_class = $p->element_class;
143             }
144              
145             requires '_make_request';
146              
147             has 'tree' => (
148             is => 'ro',
149             isa => $ele_class,
150             writer => '_set_tree',
151             predicate => 'has_tree',
152             clearer => 'clear_tree',
153              
154             # Since HTML::Element isn't a moose object, i have to 'list' everything I
155             # want it to handle myself here. how annoying. But since I'm lazy, I'll just
156             # take all subs from the symbol table that don't start with a _
157             handles => sub {
158             my ($attr, $delegate_class) = @_;
159              
160             my %methods = map { $_->name => 1
161             } $attr->associated_class->get_all_methods;
162              
163             # Never delegate the 'import' method
164             $methods{import} = 1;
165              
166             return
167             map { $_->name => $_->name }
168             grep { my $n = $_->name; $n !~ /^_/ && !$methods{$n} }
169             $delegate_class->get_all_methods;
170             }
171             );
172              
173             around '_make_request' => sub {
174             my $orig = shift;
175             my $self = shift;
176             my $ret = $self->$orig(@_);
177              
178             # Someone needs to learn about weak refs
179             if ($self->has_tree) {
180             $self->tree->delete;
181             $self->clear_tree;
182             }
183              
184             if ($ret->content_type =~ m[^(text/html|application/(?:.*?\+)xml)]) {
185             $self->_set_tree( $tree_class->new_from_content($ret->decoded_content)->elementify );
186             }
187            
188             return $ret;
189             };
190              
191             sub DESTROY {}
192              
193             after DESTROY => sub {
194             my $self = shift;
195             $self->tree->delete if ($self->has_tree && $self->tree);
196             };
197              
198             };
199              
200             no Moose::Util::TypeConstraints;
201             no MooseX::Role::Parameterized;
202              
203             =head1 AUTHOR
204              
205             Ash Berlin C<< <ash@cpan.org> >>
206              
207             =head1 LICENSE
208              
209             Same as Perl 5.8, or at your option any later version of Perl.
210              
211             =cut
212              
213             1;