File Coverage

blib/lib/HTML/DOM.pm
Criterion Covered Total %
statement 436 441 98.8
branch 179 198 90.4
condition 85 109 77.9
subroutine 106 106 100.0
pod 55 59 93.2
total 861 913 94.3


line stmt bran cond sub pod time code
1             package HTML::DOM;
2              
3             # If you are looking at the source code (which you are obviously doing
4             # if you are reading this), note that '# ~~~' is my way of marking
5             # something to be done still (except in this sentence).
6              
7              
8 24     24   186286 use 5.008003;
  24         54  
9              
10 24     24   78 use strict;
  24         30  
  24         357  
11 24     24   71 use warnings;
  24         26  
  24         523  
12              
13 24     24   67 use Carp 'croak';
  24         26  
  24         1033  
14 24     24   10309 use HTML::DOM::Element;
  24         57  
  24         1085  
15 24     24   109 use HTML::DOM::Exception 'NOT_SUPPORTED_ERR';
  24         25  
  24         821  
16 24     24   84 use HTML::DOM::Node 'DOCUMENT_NODE';
  24         24  
  24         710  
17 24     24   73 use Scalar::Util 'weaken';
  24         21  
  24         688  
18 24     24   87 use URI;
  24         22  
  24         2590  
19              
20             our $VERSION = '0.057';
21             our @ISA = 'HTML::DOM::Node';
22              
23             require HTML::DOM::Collection;
24             require HTML::DOM::Comment;
25             require HTML::DOM::DocumentFragment;
26             require HTML::DOM::Implementation;
27             require HTML::DOM::NodeList::Magic;
28             require HTML::DOM::Text;
29             require HTML::Tagset;
30             require HTML::DOM::_TreeBuilder;
31              
32             use overload fallback => 1,
33             '%{}' => sub {
34 20304     20304   23094 my $self = shift;
35             #return $self; # for debugging
36 20304 100 100     87113 $self->isa(scalar caller) || caller->isa('HTML::DOM::_TreeBuilder')
37             and return $self;
38 3         12 $self->forms;
39 24     24   84 };
  24         24  
  24         135  
40              
41              
42             =head1 NAME
43              
44             HTML::DOM - A Perl implementation of the HTML Document Object Model
45              
46             =head1 VERSION
47              
48             Version 0.057 (alpha)
49              
50             B This module is still at an experimental stage. The API is
51             subject to change without
52             notice.
53              
54             =head1 SYNOPSIS
55              
56             use HTML::DOM;
57            
58             my $dom_tree = new HTML::DOM; # empty tree
59             $dom_tree->write($source_code);
60             $dom_tree->close;
61            
62             my $other_dom_tree = new HTML::DOM;
63             $other_dom_tree->parse_file($filename);
64            
65             $dom_tree->getElementsByTagName('body')->[0]->appendChild(
66             $dom_tree->createElement('input')
67             );
68            
69             print $dom_tree->innerHTML, "\n";
70              
71             my $text = $dom_tree->createTextNode('text');
72             $text->data; # get attribute
73             $text->data('new value'); # set attribute
74            
75             =head1 DESCRIPTION
76              
77             This module implements the HTML Document Object Model by extending the
78             HTML::Tree modules. The HTML::DOM class serves both as an HTML parser and
79             as the document class.
80              
81             The following DOM modules are currently supported:
82              
83             Feature Version (aka level)
84             ------- -------------------
85             HTML 2.0
86             Core 2.0
87             Events 2.0
88             UIEvents 2.0
89             MouseEvents 2.0
90             MutationEvents 2.0
91             HTMLEvents 2.0
92             StyleSheets 2.0
93             CSS 2.0 (partially)
94             CSS2 2.0
95             Views 2.0
96              
97             StyleSheets, CSS and CSS2 are actually provided by L. This list
98             corresponds to CSS::DOM versions 0.02 to 0.14.
99              
100             =for comment
101             Level 2 interfaces not yet included: Range, Traversal
102              
103             =head1 METHODS
104              
105             =head2 Construction and Parsing
106              
107             =over 4
108              
109             =item $tree = new HTML::DOM %options;
110              
111             This class method constructs and returns a new HTML::DOM object. The
112             C<%options>, which are all optional, are as follows:
113              
114             =over 4
115              
116             =item url
117              
118             The value that the C method will return. This value is also used by
119             the C method.
120              
121             =item referrer
122              
123             The value that the C method will return
124              
125             =item response
126              
127             An HTTP::Response object. This will be used for information needed for
128             writing cookies. It is expected to have a reference to a request object
129             (accessible via its C method--see L). Passing a
130             parameter to the 'cookie' method will be a no-op
131             without this.
132              
133             =item weaken_response
134              
135             If this is passed a true value, then the HTML::DOM object will hold a weak
136             reference to the response.
137              
138             =item cookie_jar
139              
140             An HTTP::Cookies object. As with C, if you omit this, arguments
141             passed to the
142             C method will be ignored.
143              
144             =item charset
145              
146             The original character set of the document. This does not affect parsing
147             via the C method (which always assumes Unicode). C will
148             use this, if specified, or L otherwise.
149             L's C method uses this to encode form data
150             unless the form has a valid 'accept-charset' attribute.
151              
152             =back
153              
154             If C and C are omitted, they can be inferred from
155             C.
156              
157             =cut
158              
159             {
160             # This HTML::DOM::Element::HTML package represents the
161             # documentElement. It inherits from
162             # HTML::DOM::_TreeBuilder and acts
163             # as the parser. It is also used as a parser for innerHTML.
164              
165             # Note for potential developers: You can’t refer to ->parent in
166             # this package and expect it to provide the document, because
167             # that’s not the case with innerHTML. Use ->ownerDocument.
168             # Use ->parent only to distinguish between innerHTML and
169             # the regular parser.
170              
171             # Concerning magic associations between forms and fields: To cope
172             # with bad markup, an implicitly closed form (with no end tag) is
173             # associated with any form fields that occur after that are not
174             # inside any form. So when a start tag for a form is encountered,
175             # we make that the ‘current form’, by pushing it on to
176             # @{ $$self{_HTML_DOM_cf} }. When the element is closed, if it
177             # is closed by an end tag, we simply pop it off the cf array. If
178             # it is implicitly closed we pop it off and also make it the
179             # ‘magic form’ (_HTML_DOM_mg_f). When we encounter a form field,
180             # we give it a magic association with the form if the cf
181             # stack is empty.
182              
183              
184             package HTML::DOM::Element::HTML;
185             our @ISA = qw' HTML::DOM::Element HTML::DOM::_TreeBuilder';
186              
187 24     24   2026 use Scalar::Util qw 'weaken isweak';
  24         26  
  24         31747  
188              
189             # I have to override this so it doesn't delete _HTML_DOM_* attri-
190             # butes and so that it doesn’t rebless the object.
191             sub elementify {
192 93     93   107 my $self = shift;
193 93 100       2721 my %attrs = map /^[a-z_]*\z/ ? () : ($_ => $self->{$_}),
194             keys %$self;
195 93         1384 my @weak = grep isweak $self->{$_}, keys %$self;
196 93         409 $self->SUPER::elementify;
197 93         649 %$self = (%$self, %attrs); # this invigorates feeble refs
198 93         555 weaken $self->{$_} for @weak;
199             }
200              
201             sub new {
202 147     147   173 my $tb; # hafta declare it separately so the closures can
203             # c it
204             ($tb = shift->HTML::DOM::_TreeBuilder::new(
205             element_class => 'HTML::DOM::Element',
206             'tweak_~text' => sub {
207 547     547   601 my ($text, $parent) = @_;
208             # $parent->ownerDocument will be undef if
209             # $parent is the doc.
210 547   33     1243 $parent->splice_content( -1,1,
211             ($parent->ownerDocument || $parent)
212             ->createTextNode($text) );
213             $parent->content_offset(
214             $$tb{_HTML_DOM_tb_c_offset}
215 547         1452 );
216             },
217             'tweak_*' => sub {
218 807     807   872 my($elem, $tag, $doc_elem) = @_;
219 807 100       1347 $tag =~ /^~/ and return;
220              
221 800 100       1173 if(
222             $tag eq 'link'
223             ) {
224 16         33 HTML'DOM'Element'Link'_reset_style_sheet(
225             $elem
226             );
227             }
228              
229             # If a form is being closed, determine
230             # whether it is closed implicitly and set
231             # the current form and magic form
232             # accordingly.
233 800 100       1092 if($tag eq 'form') {
234             pop
235 40 50       35 @{$$doc_elem{_HTML_DOM_cf}||[]};
  40         104  
236             delete $$doc_elem{_HTML_DOM_etif}
237             or $$doc_elem{_HTML_DOM_mg_f}
238 40 100       101 = $elem
239             }
240              
241             # If a formie is being closed, create a
242             # magic association where appropriate.
243 800 100 100     2984 if(!$$doc_elem{_HTML_DOM_no_mg}
      66        
      66        
      100        
244             and $tag =~ /^(?:
245             button|(?:
246             fieldse|inpu|(?:obj|sel)ec
247             )t|label|textarea
248             )\z/x
249             and $$doc_elem{_HTML_DOM_mg_f}
250             and !$$doc_elem{_HTML_DOM_cf}
251             ||!@{$$doc_elem{_HTML_DOM_cf}}) {
252             $elem->form(
253             $$doc_elem{_HTML_DOM_mg_f}
254 9         27 );
255 9         19 $doc_elem->ownerDocument->
256             magic_forms(1);
257             }
258              
259             my $event_offsets = delete
260             $elem->{_HTML_DOM_tb_event_offsets}
261 800 100       1994 or return;
262 4         11 _create_events(
263             $doc_elem, $elem, $event_offsets
264             );
265             },
266 147         1159 ))
267             ->ignore_ignorable_whitespace(0); # stop eof()'s cleanup
268 147         394 $tb->store_comments(1); # from changing an
269 147         398 $tb->unbroken_text(1); # necessary, con- # elem_han-
270             # sidering what # dler's view
271             # _tweak_~text does # of the tree
272              
273             # Web browsers preserve whitespace, at least from the point
274             # of view of the DOM; but the main reason we are using this
275             # option is that a parser for innerHTML doesn’t know
276             # whether the nodes will be inserted in a
. 
277 147         339 no_space_compacting $tb 1;
278              
279 147         694 $tb->handler(text => "text", # so we can get line
280             "self, text, is_cdata, offset"); # numbers for scripts
281 147         541 $tb->handler(start => "start",
282             "self, tagname, attr, attrseq, offset, tokenpos");
283 147         778 $tb->handler((declaration=>)x2,'self,tagname,tokens,text');
284              
285 147         499 $tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'};
286              
287 147         220 my %opts = @_;
288 147         195 $tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms};
289             # used by an element’s innerHTML
290              
291             # We have to copy it like this, because our circular ref-
292             # erence is thus: $tb -> object -> closure -> $tb
293             # We can’t weaken $tb without a copy of it, because it is
294             # the only reference to the object.
295 147         151 my $life_raft = $tb; weaken $tb; $tb;
  147         218  
  147         441  
296             }
297              
298             sub start {
299 594 100   594   1301 return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit
300            
301 589         521 my $tokenpos = pop;
302 589         479 my $offset = pop;
303 589         496 my %event_offsets;
304 589         440 my $attr_names = pop;
305 589         1280 for(0..$#$attr_names) {
306             $$attr_names[$_] =~ /^on(.*)/is
307 535 100       1554 and $event_offsets{$1} =
308             $$tokenpos[$_*4 + 4] + $offset;
309             }
310              
311 589         1472 my $elem = (my $self = shift)->SUPER::start(@_);
312            
313 589 100 100     1072 $_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] },
  40         161  
314             $elem;
315              
316 589 100       4453 return $elem unless %event_offsets;
317              
318 5 100       13 if(!$HTML::Tagset::emptyElement{$_[0]}) { # container
319             $$elem{_HTML_DOM_tb_event_offsets} =
320 4         9 \%event_offsets;
321             } else {
322 1         3 _create_events(
323             $self,
324             $elem,
325             \%event_offsets,
326             );
327             }
328              
329 5         75 return $elem;
330             }
331              
332             sub _create_events {
333 5     5   8 my ($doc_elem,$elem,$event_offsets) = @_;
334 5 100       15 defined(my $event_attr_handler =
335             $doc_elem->ownerDocument->event_attr_handler)
336             or return;
337 3         7 for(keys %$event_offsets) {
338             my $l =
339             &$event_attr_handler(
340             $elem,
341             $_,
342             $elem->attr("on$_"),
343 3         19 $$event_offsets{$_}
344             );
345 3 50       414 defined $l and
346             $elem->event_handler (
347             $_, $l
348             );
349             }
350             }
351              
352             sub text {
353 547     547   720 $_[0]{_HTML_DOM_tb_c_offset} = pop;
354 547         1239 shift->SUPER::text(@_)
355             }
356              
357             sub insert_element {
358 860     860   1180 my ($self, $tag) = (shift, @_);
359 860 100 100     2750 if((ref $tag ? $tag->tag : $tag) eq 'tr'
    100          
360             and $self->pos->tag eq 'table') {
361 12         32 $self->insert_element('tbody', 1);
362             }
363 860         1912 $self->SUPER::insert_element(@_);
364             }
365              
366             sub end {
367 601     601   575 my $self = shift;
368              
369             # If this is a form, record that we’ve seen an end tag, so
370             # that this does not become a ‘magic form’.
371             ++$$self{_HTML_DOM_etif} # end tag is 'form'
372 601 100       1129 if $_[0] eq 'form';
373              
374             # Make sure cannot close a cell outside the cur-
375             # rent table.
376 601 100       1188 $_[0] =~ /^t[hd]\z/ and @_ = (\$_[0], 'table');
377              
378             # HTML::TreeBuilder expects the element to be the
379             # topmost element, and gets confused when it’s inside the
380             # ~doc. It sets _pos to the doc when it encounters .
381             # This works around that.
382 601         569 my $pos = $self->{_pos};
383 601         1380 my @ret = $self->SUPER::end(@_);
384             $self->{_pos} = $pos
385 600 100 100     1650 if ($self->{_pos}||return @ret)->{_tag} eq '~doc';
386 561         1827 @ret; # TB relies on this retval
387             }
388              
389             sub declaration {
390 9     9   19 my($self,$tagname,$tokens,$source) = @_;
391             return
392 9 100 66     63 unless $tagname eq 'doctype'
393             and my $parent = $self->parent;
394             package HTML::DOM; # bypass overloading
395             $parent->{_HTML_DOM_doctype} = $source
396 8 50       15 unless defined $parent->{_HTML_DOM_doctype};
397 8 100       26 return unless @$tokens > 3;
398 7         22 for ($self->{_HTML_DOM_version} = $tokens->[3]){
399 7 50       81 s/^['"]// and s/['"]\z//;
400             }
401             }
402              
403 1147     1147   3119 sub element_class { 'HTML::DOM::Element' }
404              
405             # HTMLHtmlElement interface
406 5     5   412 sub version { shift->_attr('version' => @_) }
407              
408             } # end of special TreeBuilder package
409              
410             sub new {
411 94     94 1 2358620 my $self = shift->SUPER::new('~doc');
412              
413 94         201 my %opts = @_;
414 94         1786 $self->{_HTML_DOM_url} = $opts{url}; # might be undef
415 94         327 $self->{_HTML_DOM_referrer} = $opts{referrer}; # might be undef
416 94 100       258 if($opts{response}) {
417 10         13 $self->{_HTML_DOM_response} = $opts{response};
418 10 100       16 if(!defined $self->{_HTML_DOM_url}) {{
419 8         9 $self->{_HTML_DOM_url} =
420             ($opts{response}->request || last)
421 8   100     26 ->url;
422             }}
423 10 100       45 if(!defined $self->{_HTML_DOM_referrer}) {{
424 8         12 $self->{_HTML_DOM_referrer} =
425             ($opts{response}->request || last)
426 8   100     17 ->header('Referer')
427             }}
428 10 100       52 if($opts{weaken_response}) {
429             weaken $self->{_HTML_DOM_response}
430 1         1 }
431             }
432 94         141 $self->{_HTML_DOM_jar} = $opts{cookie_jar}; # might be undef
433 94         163 $self->{_HTML_DOM_cs} = $opts{charset};
434              
435 94         394 $self;
436             }
437              
438             =item $tree->elem_handler($elem_name => sub { ... })
439              
440             If you call this method first, then, when the DOM tree is in the
441             process of
442             being built (as a result of a call to C or C), the
443             subroutine will be called after each C<$elem_name> element
444             is
445             added to the tree. If you give '*' as the element name, the subroutine
446             will be called for each element that does not have a handler. The
447             subroutine's
448             two arguments will be the tree itself
449             and the element in question. The subroutine can call the DOM object's
450             C
451             method to insert HTML code into the source after the element.
452              
453             Here is a lame example (which does not take Content-Script-Type headers
454             or security into account):
455              
456             $tree->elem_handler(script => sub {
457             my($document,$elem) = @_;
458             return unless $elem->attr('type') eq 'application/x-perl';
459             eval($elem->firstChild->data);
460             });
461              
462             $tree->write(
463             '

The time is

464            
467             precisely.
468            

'
469             );
470             $tree->close;
471              
472             print $tree->documentElement->as_text, "\n";
473              
474             (Note: L's
475             L|HTML::DOM::Element/content_offset> method might come in
476             handy for reporting line numbers for script errors.)
477              
478             =cut
479              
480             sub elem_handler {
481 133     133 1 902 my ($self,$elem_name,$sub) = @_;
482              
483             # ~~~ temporary; for internal use only:
484 133 100       332 @_ < 3 and return $$self{_HTML_DOM_nih}{$elem_name};
485              
486 9         19 $$self{_HTML_DOM_nih}{$elem_name} = $sub; # nih = node inser-
487             # tion handler
488             my $h = $self->{_HTML_DOM_elem_handlers}{$elem_name} = sub {
489             # I can’t put $doc_elem outside the closure, because
490             # ->open replaces it with another object, and we’d be
491             # referring to the wrong one.
492 16     16   24 my $doc_elem = $_[2];
493 16         33 $doc_elem->{_HTML_DOM_tweakall}->(@_);
494 16         47 $self->_modified; # in case there are node lists hanging
495             # around that the handler references
496 16         62 &$sub($self, $_[0]);
497              
498             # See the comment in sub write.
499 15         353 (my $level = $$self{_HTML_DOM_buffered});
500 15 50 66     89 if( $level
      66        
      66        
501             and ($level -= 1, 1)
502             and $$self{_HTML_DOM_p}
503             and $$self{_HTML_DOM_p}[$level]
504             ) {
505 7         13 $$self{_HTML_DOM_p}[$level]->eof;
506             $level
507 1         3 ? --$#{$$self{_HTML_DOM_p}}
508 7 100       19 : delete $$self{_HTML_DOM_p};
509             }
510 9         40 };
511 9 100       19 if(my $p = $$self{_HTML_DOM_parser}) {
512 1         4 $$p{"_tweak_$elem_name"} = $h
513             }
514 9         29 weaken $self;
515 9         17 return;
516             }
517              
518              
519             =item css_url_fetcher( \&sub )
520              
521             With this method you can provide a subroutine that fetches URLs referenced
522             by 'link' tags. Its sole argument is the URL, which is made absolute based
523             on the HTML page's own base URL (it is assumed that this is absolute). It
524             should return C or an empty list on failure. Upon
525             success, it should return just the CSS code, if it has been decoded (and is
526             in Unicode), or, if it has not been decoded, the CSS code followed by
527             C<< decode => 1 >>. See L for details on
528             when you should or should not decode it. (Note that HTML::DOM
529             automatically
530             provides an encoding hint based on the HTML document.)
531              
532             HTML::DOM passes the result of the url fetcher to L and
533             turns
534             it into a style sheet object accessible via the link element's
535             L|HTML::DOM::Element::Link/sheet> method.
536              
537             =cut
538              
539             sub css_url_fetcher {
540 11     11 1 832 my $old = (my $self = shift)->{_HTML_DOM_cuf};
541 11 100       32 $self->{_HTML_DOM_cuf} = shift if @_;
542 11 100       41 $old||();
543             }
544              
545             =item $tree->write(...) (DOM method)
546              
547             This parses the HTML code passed to it, adding it to the end of
548             the
549             document. It assumes that its input is a normal Perl Unicode string. Like
550             L's
551             C method, it can take a coderef.
552              
553             When it is called from an an element handler (see
554             C, above), the value passed to it
555             will be inserted into the HTML code after the current element when the
556             element handler returns. (In this case a coderef won't do--maybe that will
557             be added later.)
558              
559             If the C method has been called, C will call C before
560             parsing the HTML code passed to it.
561              
562             =item $tree->writeln(...) (DOM method)
563              
564             Just like C except that it appends "\n" to its argument and does
565             not work with code refs. (Rather
566             pointless, if you ask me. :-)
567              
568             =item $tree->close() (DOM method)
569              
570             Call this method to signal to the parser that the end of the HTML code has
571             been reached. It will then parse any residual HTML that happens to be
572             buffered. It also makes the next C call C.
573              
574             =item $tree->open (DOM method)
575              
576             Deletes the HTML tree, resetting it so that it has just an element,
577             and a parser hungry for HTML code.
578              
579             =item $tree->parse_file($file)
580              
581             This method takes a file name or handle and parses the content,
582             (effectively) calling C afterwards. In the former case (a file
583             name), L will be used to detect the encoding. In the
584             latter (a file handle), you'll have to C it yourself. This could
585             be considered a bug. If you have a solution to this (how to make
586             HTML::Encoding detect an encoding from a file handle), please let me know.
587              
588             As of version 0.12, this method returns true upon success, or undef/empty
589             list on failure.
590              
591             =item $tree->charset
592              
593             This method returns the name of the character
594             set that was passed to C, or, if that was not given, that which
595             C used.
596              
597             It returns undef if C was not given a charset and if C was
598             not
599             used or was
600             passed a file handle.
601              
602             You can also set the charset by passing an argument, in which case the old
603             value is returned.
604              
605              
606             =cut
607              
608             sub parse_file {
609 5     5 1 14 my $file = $_[1];
610              
611 5         15 $_[0]->open;
612              
613             # This ‘if’ statement uses the same check that HTML::Parser uses.
614             # We are not strictly checking to see whether it’s a handle,
615             # but whether HTML::Parser would consider it one.
616 5 50 33     41 if (ref($file) || ref(\$file) eq "GLOB") {
617             (my $a = shift->{_HTML_DOM_parser})
618 0 0       0 ->parse_file($file) || return;
619 0         0 $a ->elementify;
620 0         0 return 1;
621             }
622              
623 24     24   116 no warnings 'parenthesis'; # 5.8.3 Grrr!!
  24         38  
  24         13091  
624 5 100       8 if(my $charset = $_[0]{_HTML_DOM_cs}) {
625 3 50       113 open my $fh, $file or return;
626 3         18 $charset =~ s/^(?:x-?)?mac-?/mac/i;
627 3     1   49 binmode $fh, ":encoding($charset)";
  1         5  
  1         2  
  1         7  
628             $$_{_HTML_DOM_parser}->parse_file($fh) || return,
629             $_->close
630 3   50     3463 for shift;
631 3         37 return 1;
632             }
633              
634 2 100       104 open my $fh, $file or return;
635 1         4 local $/;
636 1         17 my $contents = <$fh>;
637 1         598 require HTML::Encoding;
638 1   50     10293 my $encoding = HTML::Encoding::encoding_from_html_document(
639             $contents
640             ) || 'iso-8859-1';
641             # Since we’ve already slurped the file, we might as well
642             # avoid having HTML::Parser read it again, even if we could
643             # use binmode.
644 1         4773 require Encode;
645             $_->write(Encode::decode($encoding, $contents)), $_->close,
646             $_->{_HTML_DOM_cs} = $encoding
647 1         6 for shift;
648 1         23 return 1;
649             }
650              
651             sub charset {
652 38     38 1 1001 my $old = (my$ self = shift)->{_HTML_DOM_cs};
653 38 100       94 $self->{_HTML_DOM_cs} = shift if @_;
654 38         147 $old;
655             }
656              
657             sub write {
658 110     110 1 11964 my $self = shift;
659 110 100       184 if($$self{_HTML_DOM_buffered}) {
660             # Although we call this buffered, it’s actually not. Before
661             # version 0.040, a recursive call to ->write on the same
662             # doc object would simply record the HTML code in a buffer
663             # that was processed when the elem handler that made the
664             # inner call to ->write finished. Every elem handler would
665             # have a wrapper (created in the elem_handler sub above)
666             # that took care of this after calling the handler, by cre-
667             # ating a new, temporary, parser object that would call the
668             # start/end, etc., methods of our tree builder.
669             #
670             # This approach stops JS code like this from working (yes,
671             # there *are* websites with code like this!):
672             # document.write("")
673             # document.getElementById("img1").src="..."
674             #
675             # So, now we take care of creating a new parser immedi-
676             # ately. This does mean, however that we end up with mul-
677             # tiple parser objects floating around in the case of
678             # nested . So we have to be careful to create and
679             # delete them at the right time.
680              
681             # $$self{_HTML_DOM_buffered} actually contains a number
682             # indicating the number of nested calls to ->write.
683 7         11 my $level = $$self{_HTML_DOM_buffered};
684 7         11 local $$self{_HTML_DOM_buffered} = $level + 1;
685              
686 7         12 my($doc_elem) = $$self{_HTML_DOM_parser};
687              
688             # These handlers delegate the handling to methods of
689             # *another* HTML::Parser object.
690             my $p = $$self{_HTML_DOM_p}[$level-1] ||=
691             HTML::Parser->new(
692             start_h => [
693 5     5   15 sub { $doc_elem->start(@_) },
694             'tagname, attr, attrseq'
695             ],
696             end_h => [
697 1     1   21 sub { $doc_elem->end(@_) },
698             'tagname, text'
699             ],
700             text_h => [
701 7     7   16 sub { $doc_elem->text(@_) },
702 7   33     13 'text, is_cdata'
703             ],
704             );
705              
706 7         325 $p->unbroken_text(1); # push_content, which is called by
707             # H:TB:text, won't concatenate two
708             # text portions if the first one
709             # is a node.
710              
711 7         38 $p->parse(shift);
712              
713             # We can’t get rid of our parser at this point, as a subse-
714             # quent ->write call from the same nested level (e.g., from
715             # the same ), then we need to remove it, so we have
719             # elem_handler do that for us.
720             }
721             else {
722             my $parser
723             = $$self{_HTML_DOM_parser}
724 103   66     146 || ($self->open, $$self{_HTML_DOM_parser});
725 103         170 local $$self{_HTML_DOM_buffered} = 1;
726 103         1258 $parser->parse($_) for @_;
727             }
728 110         275 $self->_modified;
729             return # nothing;
730 110         235 }
731              
732 4     4 1 10 sub writeln { shift->write(@_,"\n") }
733              
734             sub close {
735 97     97 1 699 my $a = (my $self = shift)->{_HTML_DOM_parser};
736 97 100       232 return unless $a;
737              
738             # We can’t use eval { $a->eof } because that would catch errors
739             # that are meant to propagate (a nasty bug [the so-called
740             # ‘content—offset’ bug] was hidden because of an eval in ver-
741             # sion 0.010).
742             # return unless $a->can('eof');
743            
744 94         364 $a->eof(@_);
745 93         117 delete $$self{_HTML_DOM_parser};
746 93         245 $a->elementify;
747             return # nothing;
748 93         181 }
749              
750             sub open {
751 116     116 1 2208 (my $self = shift)->detach_content;
752              
753             # We have to use push_content instead of simply putting it there
754             # ourselves, because push_content takes care of weakening the
755             # parent (and that code doesn’t belong in this package).
756             $self->push_content(
757 116         1461 my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML
758             );
759              
760 116         2611 delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>};
761              
762 116 100       280 return unless $self->{_HTML_DOM_elem_handlers};
763 14         21 for(keys %{$self->{_HTML_DOM_elem_handlers}}) {
  14         19  
764             $$tb{"_tweak_$_"} =
765 14         22 $self->{_HTML_DOM_elem_handlers}{$_}
766             }
767              
768             return # nothing;
769 14         27 }
770              
771             =back
772              
773             =head2 Other DOM Methods
774              
775             =over 4
776              
777             =cut
778              
779              
780             #-------------- DOM STUFF (CORE) ---------------- #
781              
782             =item doctype
783              
784             Returns nothing
785              
786             =item implementation
787              
788             Returns the L object.
789              
790             =item documentElement
791              
792             Returns the element.
793              
794             =item createElement ( $tag )
795              
796             =item createDocumentFragment
797              
798             =item createTextNode ( $text )
799              
800             =item createComment ( $text )
801              
802             =item createAttribute ( $name )
803              
804             Each of these creates a node of the appropriate type.
805              
806             =item createProcessingInstruction
807              
808             =item createEntityReference
809              
810             These two throw an exception.
811              
812             =for comment
813             =item createCSSStyleSheet
814             This creates a style sheet (L object).
815              
816             =item getElementsByTagName ( $name )
817              
818             C<$name> can be the name of the tag, or '*', to match all tag names. This
819             returns a node list object in scalar context, or a list in list context.
820              
821             =item importNode ( $node, $deep )
822              
823             Clones the C<$node>, setting its C attribute to the document
824             with which this method is called. If C<$deep> is true, the C<$node> will
825             be
826             cloned recursively.
827              
828             =cut
829              
830       2 1   sub doctype {} # always null
831              
832             sub implementation {
833 24     24   111 no warnings 'once';
  24         30  
  24         15158  
834 2     2 1 5 return $HTML::DOM::Implementation::it;
835             }
836              
837             sub documentElement {
838 103     103 1 727 ($_[0]->content_list)[0]
839             }
840              
841             sub createElement {
842 354     354 1 72349 my $elem = HTML::DOM::Element->new($_[1]);
843 354         1460 $elem->_set_ownerDocument(shift);
844 354         1040 $elem;
845             }
846              
847             sub createDocumentFragment {
848 9     9 1 1560 my $thing = HTML::DOM::DocumentFragment->new;
849 9         30 $thing->_set_ownerDocument(shift);
850 9         19 $thing;
851             }
852              
853             sub createTextNode {
854 606     606 1 3036 my $thing = HTML::DOM::Text->new(@_[1..$#_]);
855 606         1321 $thing->_set_ownerDocument(shift);
856 606         1580 $thing;
857             }
858              
859             sub createComment {
860 7     7 1 452 my $thing = HTML::DOM::Comment->new(@_[1..$#_]);
861 7         35 $thing->_set_ownerDocument(shift);
862 7         28 $thing;
863             }
864              
865             sub createCDATASection {
866 1     1 0 415 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
867             'The HTML DOM does not support CDATA sections' );
868             }
869              
870             sub createProcessingInstruction {
871 1     1 1 310 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
872             'The HTML DOM does not support processing instructions' );
873             }
874              
875             sub createAttribute {
876 22     22 1 2292 my $thing = HTML::DOM::Attr->new(@_[1..$#_]);
877 22         70 $thing->_set_ownerDocument(shift);
878 22         54 $thing;
879             }
880              
881             sub createEntityReference {
882 1     1 1 380 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
883             'The HTML DOM does not support entity references' );
884             }
885              
886             #sub createCSSStyleSheet {
887             # shift;
888             # require CSS'DOM;
889             # ~~~
890             #}
891              
892             sub getElementsByTagName {
893 17     17 1 520 my($self,$tagname) = @_;
894             #warn "You didn't give me a tag name." if !defined $tagname;
895 17 100       62 if (wantarray) {
896 4 100       30 return $tagname eq '*'
897             ? grep tag $_ !~ /^~/, $self->descendants
898             : $self->find($tagname);
899             }
900             else {
901             my $list = HTML::DOM::NodeList::Magic->new(
902             $tagname eq '*'
903 2     2   5 ? sub { grep tag $_ !~ /^~/, $self->descendants }
904 17     17   69 : sub { $self->find($tagname) }
905 13 100       138 );
906 13         38 $self-> _register_magic_node_list($list);
907 13         144 $list;
908             }
909             }
910              
911             sub importNode {
912 12     12 1 313 my ($self, $node, $deep) = @_;
913 12 100       45 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
914             'Documents cannot be imported.' )
915             if $node->nodeType ==DOCUMENT_NODE;
916 10         36 (my $clown = $node->cloneNode($deep))
917             ->_set_ownerDocument($self);
918 10 100       42 if($clown->can('descendants')) { # otherwise it’s an Attr, so this
919 8         15 for($clown->descendants) { # isn’t necessary
920 4         6 delete $_->{_HTML_DOM_owner};
921             }}
922 10         20 $clown;
923             }
924              
925             #-------------- DOM STUFF (HTML) ---------------- #
926              
927             =item alinkColor
928              
929             =item background
930              
931             =item bgColor
932              
933             =item fgColor
934              
935             =item linkColor
936              
937             =item vlinkColor
938              
939             These six methods return (optionally set) the corresponding attributes of
940             the body element. Note that most of the names do not map directly to the
941             names of
942             the attributes. C refers to the C attribute. Those that
943             end
944             with 'linkColor' refer to the attributes of the same name but without the
945             'Color' on the end.
946              
947             =cut
948              
949 6   100 6 1 13 sub alinkColor { (shift->body||return "")->aLink (@_) }
950 6   100 6 1 638 sub background { (shift->body||return "")->background(@_) }
951 6   100 6 1 583 sub bgColor { (shift->body||return "")->bgColor (@_) }
952 6   100 6 1 580 sub fgColor { (shift->body||return "")->text (@_) }
953 6   100 6 1 582 sub linkColor { (shift->body||return "")->link (@_) }
954 6   100 6 1 598 sub vlinkColor { (shift->body||return "")->vLink (@_) }
955              
956             =item title
957              
958             Returns (or optionally sets) the title of the page.
959              
960             =item referrer
961              
962             Returns the page's referrer.
963              
964             =item domain
965              
966             Returns the domain name portion of the document's URL.
967              
968             =item URL
969              
970             Returns the document's URL.
971              
972             =item body
973              
974             Returns the body element, or the outermost frame set if the document has
975             frames. You can set the body by passing an element as an argument, in
976             which
977             case the old body element is returned.
978              
979             =item images
980              
981             =item applets
982              
983             =item links
984              
985             =item forms
986              
987             =item anchors
988              
989             These five methods each return a list of the appropriate elements in list
990             context, or an L object in scalar context. In this
991             latter case, the object will update automatically when the document is
992             modified.
993              
994             In the case of C you can access those by using the HTML::DOM object
995             itself as a hash. I.e., you can write C<< $doc->{f} >> instead of
996             S<< C<< $doc->forms->{f} >> >>.
997              
998             =for comment
999             # ~~~ Why on earth did I ever put this in the docs?!
1000             B I need to make these methods cache the HTML collection objects
1001             that they create. Once I've done this, I can make list context use those
1002             objects, as well as scalar context.
1003              
1004             =item cookie
1005              
1006             This returns a string containing the document's cookies (the format may
1007             still change). If you pass an
1008             argument, it
1009             will set a cookie as well. Both Netscape-style and RFC2965-style cookie
1010             headers are supported.
1011              
1012             =cut
1013              
1014             sub title {
1015 12     12 1 369 my $doc = shift;
1016 12 100       35 if(my $title_elem = $doc->find('title')) {
1017 8         25 $title_elem->text(@_);
1018             }
1019             else {
1020 4 100       12 return "" unless @_;
1021 3   66     9 ( $doc->find('head')
1022             || ( $doc->find('html')
1023             || $doc->appendChild($doc->createElement('html'))
1024             )->appendChild($doc->createElement('head'))
1025             )->appendChild(
1026             my $t = $doc->createElement('title')
1027             );
1028 3         12 $t->text(@_);
1029 3         14 return "";
1030             }
1031             }
1032              
1033             sub referrer {
1034 5     5 1 904 my $referrer = shift->{_HTML_DOM_referrer};
1035 5 50       24 defined $referrer ? $referrer : ();
1036             }
1037              
1038 24     24   110 sub domain { no strict;
  24         27  
  24         14962  
1039 2     2 1 4 my $doc = shift;
1040 2         2 host {ref $doc->{_HTML_DOM_url} ? $doc->{_HTML_DOM_url}
1041 2 100       4 : ($doc->{_HTML_DOM_url} = URI->new($doc->{_HTML_DOM_url}))};
1042             }
1043              
1044             sub URL {
1045 152     152 1 1410 my $url = shift->{_HTML_DOM_url};
1046 152 100       615 defined $url ? "$url" : undef;
1047             }
1048              
1049             sub body { # ~~~ this needs to return the outermost frameset element if
1050             # there is one (if the frameset is always the second child
1051             # of , then it already does).
1052 78     78 1 1836 my $body = ($_[0]->documentElement->content_list)[1];
1053 78 100 66     332 if (!$body || $body->tag !~ /^(?:body|frameset)\z/) {
1054 24         41 $body = $_[0]->find('body','frameset');
1055             }
1056 78 100       146 if(@_>1) {
1057 2         5 my $doc_elem = $_[0]->documentElement;
1058             # I'm using the replaceChild rather than replace_with,
1059             # despite the former's convoluted syntax, since the former
1060             # has the appropriate error-checking code (or will), and
1061             # also because it triggers mutation events.
1062 2         10 $doc_elem->replaceChild($_[1],$body)
1063             }
1064             else {
1065 76         391 $body
1066             }
1067             }
1068              
1069             sub images {
1070 2     2 1 8 my $self = shift;
1071 2 100       5 if (wantarray) {
1072 1         7 return grep tag $_ eq 'img', $self->descendants;
1073             }
1074             else {
1075             my $collection = HTML::DOM::Collection->new(
1076             my $list = HTML::DOM::NodeList::Magic->new(
1077 1     1   3 sub { grep tag $_ eq 'img', $self->descendants }
1078 1         12 ));
1079 1         4 $self-> _register_magic_node_list($list);
1080 1         28 $collection;
1081             }
1082             }
1083              
1084             sub applets {
1085 2     2 1 3 my $self = shift;
1086 2 100       6 if (wantarray) {
1087 1         4 return grep $_->tag =~ /^(?:objec|apple)t\z/,
1088             $self->descendants;
1089             }
1090             else {
1091             my $collection = HTML::DOM::Collection->new(
1092             my $list = HTML::DOM::NodeList::Magic->new(
1093 1     1   4 sub { grep $_->tag =~ /^(?:objec|apple)t\z/,
1094             $self->descendants }
1095 1         6 ));
1096 1         3 $self-> _register_magic_node_list($list);
1097 1         3 $collection;
1098             }
1099             }
1100              
1101             sub links {
1102 4     4 1 444 my $self = shift;
1103 4 100       11 if (wantarray) {
1104             return grep {
1105 1         4 my $tag = tag $_;
  38         54  
1106 38 100 100     109 $tag eq 'area' || $tag eq 'a'
1107             && defined $_->attr('href')
1108             } $self->descendants;
1109             }
1110             else {
1111             my $collection = HTML::DOM::Collection->new(
1112             my $list = HTML::DOM::NodeList::Magic->new(
1113             sub { grep {
1114 3     3   22 my $tag = tag $_;
  90         152  
1115 90 100 100     278 $tag eq 'area' || $tag eq 'a'
1116             && defined $_->attr('href')
1117             } $self->descendants }
1118 3         30 ));
1119 3         11 $self-> _register_magic_node_list($list);
1120 3         30 $collection;
1121             }
1122             }
1123              
1124             sub forms {
1125 39     39 1 549 my $self = shift;
1126 39 100       80 if (wantarray) {
1127 17         57 return grep tag $_ eq 'form', $self->descendants;
1128             }
1129             else {
1130             my $collection = HTML::DOM::Collection->new(
1131             my $list = HTML::DOM::NodeList::Magic->new(
1132 22     22   84 sub { grep tag $_ eq 'form', $self->descendants }
1133 22         148 ));
1134 22         52 $self-> _register_magic_node_list($list);
1135 22         208 $collection;
1136             }
1137             }
1138              
1139             sub anchors {
1140 2     2 1 17 my $self = shift;
1141 2 100       7 if (wantarray) {
1142 1   100     5 return grep tag $_ eq 'a' && defined $_->attr('name'),
1143             $self->descendants;
1144             }
1145             else {
1146             my $collection = HTML::DOM::Collection->new(
1147             my $list = HTML::DOM::NodeList::Magic->new(
1148 1   100 1   4 sub { grep tag $_ eq 'a' && defined $_->attr('name'),
1149             $self->descendants }
1150 1         47 ));
1151 1         4 $self-> _register_magic_node_list($list);
1152 1         3 $collection;
1153             }
1154             }
1155              
1156              
1157             sub cookie {
1158 8     8 1 481 my $self = shift;
1159 8 100       16 return '' unless defined (my $jar = $self->{_HTML_DOM_jar});
1160 6         10 my $return;
1161 6 50       12 if (defined wantarray) {
1162             # Yes, this is nuts (getting HTTP::Cookies to join the cookies, and
1163             # splitting them, filtering them, and joining them again[!]), but
1164             # &HTTP::Cookies::add_cookie_header is long and complicated, and I
1165             # don't want to replicate it here.
1166 24     24   103 no warnings 'uninitialized';
  24         31  
  24         7763  
1167 6         7 my $reqclone = $self->{_HTML_DOM_response}->request->clone;
1168             # Yes this is a bit strange, but we don’t want to put
1169             # ‘use HTTP::Header 1.59’ in this file, as it would mean loading the
1170             # module even for people who are not using this feature or who are
1171             # duck-typing.
1172 6 50 66     640 if (!$reqclone->can('header_field_names')
1173 0         0 && $reqclone->isa("HTTP::Headers")) { VERSION HTTP::Headers:: 1.59 }
1174 6         16 for($reqclone->header_field_names) {
1175 2 50       50 /cookie/i and remove_header $reqclone $_;
1176             }
1177 6         104 $return = join ';', grep !/\$/,
1178             $jar->add_cookie_header(
1179             $reqclone
1180             )-> header ('Cookie')
1181             # Pieces of this regexp were stolen from HTTP::Headers::Util:
1182             =~ /\G\s* # initial whitespace
1183             (
1184             [^\s=;,]+ # name
1185             \s*=\s* # =
1186             (?:
1187             \"(?:[^\"\\]*(?:\\.[^\"\\]*)*)\" # quoted value
1188             |
1189             [^;,\s]* # unquoted value
1190             )
1191             )
1192             \s*;?
1193             /xg;
1194             }
1195 6 100       1559 if (@_) {
1196 3 50       8 return unless defined $self->{_HTML_DOM_response};
1197 3         15 require HTTP::Headers::Util;
1198             (undef,undef, my%split) =
1199 3         4 @{(HTTP::Headers::Util::split_header_words($_[0]))[0]};
  3         8  
1200 3         226 my $rfc;
1201 3         6 for(keys %split){
1202             # I *hope* this always works! (NS cookies should have no version.)
1203 6 100       14 ++ $rfc, last if lc $_ eq 'version';
1204             }
1205             (my $clone = $self->{_HTML_DOM_response}->clone)
1206 3         6 ->remove_header(qw/ Set-Cookie Set-Cookie2 /);
1207 3         567 $clone->header('Set-Cookie' . 2 x!! $rfc => $_[0]);
1208 3         129 $jar->extract_cookies($clone);
1209             }
1210 6 100       894 $return||'';
1211             }
1212              
1213             =item getElementById
1214              
1215             =item getElementsByName
1216              
1217             =item getElementsByClassName
1218              
1219             These three do what their names imply. The last two
1220             will return a list in list context, or a node list
1221             object in scalar context. Calling them in list
1222             context is probably more efficient.
1223              
1224             =cut
1225              
1226             sub getElementById {
1227 163     163 1 7505 my(@pile) = grep ref($_), @{shift->{'_content'}};
  163         288  
1228 163         203 my $id = shift;
1229 163         114 my $this;
1230 163         270 while(@pile) {
1231 24     24   115 no warnings 'uninitialized';
  24         25  
  24         11452  
1232 2501         1900 $this = shift @pile;
1233 2501 100       3524 $this->id eq $id and return $this;
1234 2339         3419 unshift @pile, grep ref($_), $this->content_list;
1235             }
1236 1         4 return;
1237             }
1238              
1239             sub getElementsByName {
1240 3     3 1 8 my($self,$name) = @_;
1241 3 100       8 if (wantarray) {
1242 2         31 return $self->look_down(name => "$name");
1243             }
1244             else {
1245             my $list = HTML::DOM::NodeList::Magic->new(
1246 1     1   2 sub { $self->look_down(name => "$name"); }
1247 1         7 );
1248 1         3 $self-> _register_magic_node_list($list);
1249 1         5 $list;
1250             }
1251             }
1252              
1253             sub getElementsByClassName {
1254 9     9 1 556 splice @_, 2, @_, 1; # Remove extra elements; add a true third elem
1255 9         32 goto &HTML'DOM'Element'_getElementsByClassName;
1256             }
1257              
1258             # ---------- DocumentEvent interface -------------- #
1259              
1260             =item createEvent ( $category )
1261              
1262             Creates a new event object, believe it or not.
1263              
1264             The C<$category> is the DOM event category, which determines what type of
1265             event object will be returned. The currently supported event categories
1266             are MouseEvents, UIEvents, HTMLEvents and MutationEvents.
1267              
1268             You can omit the C<$category> to create an instance of the event base class
1269             (not officially part of the DOM).
1270              
1271             =cut
1272              
1273             sub createEvent {
1274 267     267 1 3117 require HTML'DOM'Event;
1275 267   100     899 HTML'DOM'Event'create_event($_[1]||'');
1276             }
1277              
1278             # ---------- DocumentView interface -------------- #
1279              
1280             =item defaultView
1281              
1282             Returns the L object associated with the document.
1283              
1284             There is no such object by default; you have to put one there yourself:
1285              
1286             Although it is supposed to be read-only according to the DOM, you can set
1287             this attribute by passing an argument to it. It I still marked as
1288             read-only in
1289             L|HTML::DOM::Interface>.
1290              
1291             If you do set it, it is recommended that the object be a subclass of
1292             L.
1293              
1294             This attribute holds a weak reference to the object.
1295              
1296             =cut
1297              
1298             sub defaultView {
1299 1886     1886 1 1609 my $self = shift;
1300 1886         2347 my $old = $self->{_HTML_DOM_view};
1301 1886 100       3190 if(@_) {
1302 4         9 weaken($self->{_HTML_DOM_view} = shift);
1303             }
1304 1886 100       6344 return defined $old ? $old : ();
1305             }
1306              
1307             # ---------- DocumentStyle interface -------------- #
1308              
1309             =item styleSheets
1310              
1311             Returns a L of the document's style sheets, or a
1312             simple list in list context.
1313              
1314             =cut
1315              
1316             sub styleSheets {
1317 25     25 1 412 my $doc = shift;
1318             my $ret = (
1319             $doc->{_HTML_DOM_sheets} or
1320             $doc->{_HTML_DOM_sheets} = (
1321             require CSS::DOM::StyleSheetList,
1322             new CSS::DOM::StyleSheetList
1323             ),
1324             $doc->_populate_sheet_list,
1325             $doc->{_HTML_DOM_sheets}
1326 25   66     46 );
1327 25 100       91 wantarray ? @$ret : $ret;
1328             }
1329              
1330             =item innerHTML
1331              
1332             Serialises and returns the HTML document. If you pass an argument, it will
1333             set the contents of the document via C, C and C,
1334             returning a serialisation of the old contents.
1335              
1336             =cut
1337              
1338             sub innerHTML {
1339 25     25 1 775 my $self = shift;
1340 25         22 my $old;
1341 25 50 100     73 $old = join '' , $self->{_HTML_DOM_doctype}||'',
    100          
1342             map
1343             HTML'DOM'Element'_html_element_adds_newline
1344             ? substr((
1345             as_HTML $_ (undef)x2,{}
1346             ), 0, -1)
1347             : $_->as_HTML((undef)x2,{}),
1348             $self->content_list
1349             if defined wantarray;
1350 25 100       65 if(@_){
1351 13         29 $self->open();
1352 13         37 $self->write(shift);
1353 13         43 $self->close();
1354             }
1355             $old
1356 25         103 }
1357              
1358              
1359             =item location
1360              
1361             =item set_location_object (non-DOM)
1362              
1363             C returns the location object, if you've put one there with
1364             C. HTML::DOM doesn't actually implement such an object
1365             itself, but provides the appropriate magic to make
1366             C<< $doc->location($foo) >> translate into
1367             C<< $doc->location->href($foo) >>.
1368              
1369             BTW, the location object had better be true when used as a boolean, or
1370             HTML::DOM will think it doesn't exist.
1371              
1372             =cut
1373              
1374             sub location {
1375 3     3 1 7 my $self = shift;
1376 3 100 50     9 @_ and ($$self{_HTML_DOM_loc}||die "Can't assign to location"
1377             ." without a location object")->href(@_);
1378             $$self{_HTML_DOM_loc}||()
1379 3 100       6 }
1380              
1381             sub set_location_object {
1382 1     1 1 3 $_[0]{_HTML_DOM_loc} = $_[1];
1383             }
1384              
1385              
1386             =item lastModified
1387              
1388             This method returns the document's modification date as gleaned from the
1389             response object passed to the constructor, in MM/DD/YYYY HH:MM:SS format.
1390              
1391             If there is no modification date, an empty string is returned, but this
1392             may change in the future.
1393              
1394             =begin comment
1395              
1396             When there is no modification date, the return value is different in every
1397             browser.
1398             NS 2-4 and Opera 9 have the epoch (in GMT format).
1399             Firefox 3 has the time the page was loaded.
1400             Safari 4 has an empty string (it uses GMT format when there is a mod time).
1401             IE, 6-8 the only one to comply with HTML 5, has the current time; but HTML
1402             5 is illogical, since it makes no sense for the modification time to keep
1403             ticking away.
1404              
1405             I’ve opted to use the empty string for now, since we can’t *really* find
1406             out the modification time--only what the server *says* it is. And if the
1407             server doesn’t say, it’s no use pretending that it did say it.
1408              
1409             =end comment
1410              
1411             =cut
1412              
1413             sub lastModified {
1414 4 100 100 4 1 445 my $time = ($_[0]{_HTML_DOM_response} || return '')->last_modified
1415             or return '';
1416 2         700 require Date'Format;
1417 2         3618 Date'Format'time2str("%d/%m/%Y %X", $time);
1418             }
1419              
1420              
1421             =back
1422              
1423             =cut
1424              
1425              
1426             # ---------- OVERRIDDEN NODE & EVENT TARGET METHODS -------------- #
1427              
1428       44 1   sub ownerDocument {} # empty list
1429 1     1 1 602 sub nodeName { '#document' }
1430 24     24   109 { no warnings 'once'; *nodeType = \& DOCUMENT_NODE; }
  24         30  
  24         3943  
1431              
1432             =head2 Other (Non-DOM) Methods
1433              
1434             (See also L, below.)
1435              
1436             =over 4
1437              
1438             =item $tree->base
1439              
1440             Returns the base URL of the page; either from a tag, from
1441             the response object passed to C, or the
1442             URL passed to C.
1443              
1444             =cut
1445              
1446             sub base {
1447 159     159 1 194 my $doc = shift;
1448 159 100       636 if(
    100          
1449             my $base_elem = $doc->look_down(_tag => 'base', href => qr)(?:\)))
1450             ){
1451 10         31 return ''.$base_elem->attr('href');
1452             }
1453             elsif (my $r = $$doc{_HTML_DOM_response}) {
1454 2         3 my $base;
1455 2 100 66     26 ($base) = $r->header('Content-Base')
1456             or ($base) = $r->header('Content-Location')
1457             or $base = $r->header('Base');
1458             # URI does not document $URI::scheme_re, but HTTP::Response
1459             # (which is in a separate distribution) uses it. It seems
1460             # unlikely that it will go away in future URI versions, as
1461             # that would break existing versions of HTTP::Response.
1462 2 100 66     225 if ($base && $base =~ /^$URI::scheme_re:/o) {
1463             # already absolute
1464 1         8 return $base;
1465             }
1466 1         3 my $req = request $r;
1467 1 50       10 my $uri = $req ? uri $req : $doc->URL;
1468 1 50       35 return undef unless $uri;
1469             # Work around URI bug.
1470 1 50 33     13 if (!defined $base && $uri =~ /^[Dd][Aa][Tt][Aa]:/) {
1471 1         10 return $uri;
1472             }
1473 24     24   96 no warnings 'uninitialized';
  24         35  
  24         11268  
1474 0         0 ''.new_abs URI $base,$uri;
1475             }
1476             else {
1477 147         246 $doc->URL
1478             }
1479             }
1480              
1481             =item $tree->magic_forms
1482              
1483             This is mainly for internal use. It returns a boolean indicating whether
1484             the parser needed to associate formies with a form that did not contain
1485             them. This happens when a closing tag is missing and the form is
1486             closed implicitly, but a formie is encountered later.
1487              
1488             =cut
1489              
1490 1580 50   1580 1 3726 sub magic_forms { @_ and ++$_[0]{_HTML_DOM_mg_f}; $_[0]{_HTML_DOM_mg_f} }
  1580         2131  
1491              
1492             =back
1493              
1494             =head1 HASH ACCESS
1495              
1496             You can use an HTML::DOM object as a hash ref to access it's form elements
1497             by name. So C<< $doc->{yayaya} >> is short for
1498             S<< C<< $doc->forms->{yayaya} >> >>.
1499              
1500             =head1 EVENT HANDLING
1501              
1502             HTML::DOM supports both the DOM Level 2 event model and the HTML 4 event
1503             model.
1504              
1505             Throughout this documentation, we make use of HTML 5's distinction between
1506             handlers and listeners: An event handler is the result of an HTML element
1507             beginning with 'on', e.g. onsubmit. These are also accessible via the DOM.
1508             (We also use the word 'handler' in other contexts, such as the 'default
1509             event handler'.)
1510             Event listeners are registered solely with the C method
1511             and can be removed with C.
1512              
1513             HTML::DOM accepts as an event handler a coderef, an object with a
1514             C method, or an object with C<&{}> overloading. If the
1515             C method is present, it is called with the current event
1516             target as the first argument and the event object as the second.
1517             This is to allow for objects that wrap JavaScript functions (which must be called with the event target as the B value).
1518              
1519             An event listener is a coderef, an object with a C
1520             method or an object with C<&{}> overloading. HTML::DOM does not implement
1521             any classes that provide a C method, but will support any
1522             object that has one.
1523              
1524             Listeners and handlers differ in one important aspect. A listener has to
1525             call C on the event object to cancel the default action. A
1526             handler simply returns a defined false value (except for mouseover events,
1527             which must return a true value to cancel the default).
1528              
1529             =head2 Default Actions
1530              
1531             Default actions that HTML::DOM is capable of handling internally (such as
1532             triggering a DOMActivate event when an element is clicked, and triggering a
1533             form's submit event when the submit button is activated) are dealt with
1534             automatically. You don't have to worry about those. For others, read
1535             on....
1536              
1537             To specify the default actions associated with an event, provide a
1538             subroutine (in this case, it not being part of the DOM, you can't use an
1539             object with a C method) via the C
1540             and
1541             C methods.
1542              
1543             With the former, you can specify the
1544             default action to be taken when a particular type of event occurs. The
1545             currently supported types are:
1546              
1547             submit when a form is submitted
1548             link called when a link is activated (DOMActivate event)
1549              
1550             Pass the type of event as the first argument and a code ref as the second
1551             argument. When the code ref is called, its sole argument will
1552             be the event object. For instance:
1553              
1554             $dom_tree->default_event_handler_for( link => sub {
1555             my $event = shift;
1556             go_to( $event->target->href );
1557             });
1558             sub go_to { ... }
1559              
1560             C with just one argument returns the
1561             currently
1562             assigned coderef. With two arguments it returns the old one after
1563             assigning the new one.
1564              
1565             Use C (without the C<_for>) to specify a fallback
1566             subroutine that will be used for events not in the list above, and for
1567             events in the list above that do not have subroutines assigned to them.
1568             Without any arguments it will return the currently
1569             assigned coderef. With an argument it will return the old one after
1570             assigning the new one.
1571              
1572             =head2 Dispatching Events
1573              
1574             HTML::DOM::Node's C method triggers the appropriate event
1575             listeners, but does B call any default actions associated with it.
1576             The return value is a boolean that indicates whether the default action
1577             should be taken.
1578              
1579             H:D:Node's C method will trigger the event for real. It will
1580             call C and, provided it returns true, will call the default
1581             event handler.
1582              
1583             =head2 HTML Event Attributes
1584              
1585             The C can be used to assign a coderef that will turn
1586             text assigned to an event attribute (e.g., C) into an event
1587             handler. The
1588             arguments to the routine will be (0) the element, (1) the name (aka
1589             type) of
1590             the event (without the initial 'on'), (2) the value of the attribute and
1591             (3) the offset within the source of the attribute's value. (Actually, if
1592             the value is within quotes, it is the offset of the first quotation mark.
1593             Also, it will be C for generated HTML [source code passed to the
1594             C method by an element handler].)
1595             As
1596             with C, you
1597             can replace an existing handler with a new one, in which case the old
1598             handler is returned. If you call this method without arguments, it returns
1599             the current handler. Here is an example of its use, that assumes that
1600             handlers are Perl code:
1601              
1602             $dom_tree->event_attr_handler(sub {
1603             my($elem, $name, $code, $offset) = @_;
1604             my $sub = eval "sub { $code }";
1605             return sub {
1606             local *_ = \$elem;
1607             &$sub;
1608             };
1609             });
1610              
1611             The event attribute handler will be called whenever an element attribute
1612             whose name
1613             begins with 'on' (case-tolerant) is modified. (For efficiency's sake, I may
1614             change it to call the event attribute handler only when the event is
1615             triggered, so it is not called unnecessarily.)
1616              
1617             =head2 When an Event Handler Dies
1618              
1619             Use C to assign a coderef that will be called whenever an
1620             event listener (or handler) raises an error. The error will be contained in
1621             C<$@>.
1622              
1623             =head2 Other Event-Related Methods
1624              
1625             =over
1626              
1627             =item $tree->event_parent
1628              
1629             =item $tree->event_parent( $new_val )
1630              
1631             This method lets you provide an object that is added to the top of the
1632             event dispatch chain. E.g., if you want the view object (the value of
1633             C, aka the window) to have event handlers called before the
1634             document in the capture phase, and after it in the bubbling phase, you can
1635             set it like this (see also L, above):
1636              
1637             $tree->event_parent( $tree->defaultView );
1638              
1639             This holds a weak reference.
1640              
1641             =item $tree->event_listeners_enabled
1642              
1643             =item $tree->event_listeners_enabled( $new_val )
1644              
1645             This attribute, which is true by default, can be used to disable event
1646             handlers and listeners. (Default event handlers [see above] still run,
1647             though.)
1648              
1649             =back
1650              
1651             =cut
1652              
1653              
1654             # ---------- NON-DOM EVENT METHODS -------------- #
1655              
1656             sub event_attr_handler {
1657 21     21 0 402 my $old = $_[0]->{_HTML_DOM_event_attr_handler};
1658 21 100       50 $_[0]->{_HTML_DOM_event_attr_handler} = $_[1] if @_ > 1;
1659 21         67 $old;
1660             }
1661             sub default_event_handler {
1662 1886     1886 0 2066 my $old = $_[0]->{_HTML_DOM_default_event_handler};
1663 1886 100       3472 $_[0]->{_HTML_DOM_default_event_handler} = $_[1] if @_ > 1;
1664 1886         3570 $old;
1665             }
1666             sub default_event_handler_for {
1667 222     222 0 339 my $old = $_[0]->{_HTML_DOM_dehf}{$_[1]};
1668 222 100       481 $_[0]->{_HTML_DOM_dehf}{$_[1]} = $_[2] if @_ > 2;
1669 222         824 $old;
1670             }
1671             sub error_handler {
1672 1895     1895 1 2590 my $old = $_[0]->{_HTML_DOM_error_handler};
1673 1895 100       3826 $_[0]->{_HTML_DOM_error_handler} = $_[1] if @_ > 1;
1674 1895         4963 $old;
1675             }
1676              
1677             sub event_parent {
1678 677     677 1 779 my $old = (my $self = shift) ->{_HTML_DOM_event_parent};
1679 677 100       1248 weaken($self->{_HTML_DOM_event_parent} = shift) if @_;
1680 677         2238 $old
1681             }
1682              
1683             sub event_listeners_enabled {
1684 1891     1891 1 3009 my $old = (my $Self = shift)->{_HTML_DOM_doevents};
1685 1891 100       3428 @_ and $$Self{_HTML_DOM_doevents} = !!shift;
1686 1891 100       5922 defined $old ? $old : 1; # true by default
1687             }
1688              
1689              
1690             # ---------- NODE AND SHEET LIST HELPER METHODS -------------- #
1691              
1692             sub _modified { # tells all it's magic nodelists that they're stale
1693             # and also rewrites the style sheet list if present
1694 376     376   599 my $list = $_[0]{_HTML_DOM_node_lists};
1695 376         439 my $list_is_stale;
1696 376         601 for (@$list) {
1697 276 100       1950 defined() ? $_->_you_are_stale : ++$list_is_stale
1698             }
1699 376 100       613 if($list_is_stale) {
1700 48         138 @$list = grep defined, @$list;
1701 48         129 weaken $_ for @$list;
1702             }
1703            
1704 376         645 $_[0]->_populate_sheet_list
1705             }
1706              
1707             sub _populate_sheet_list { # called both by styleSheets and _modified
1708 413   100 413   470 for($_[0]->{_HTML_DOM_sheets}||return) {
1709 12         53 @$_ = map sheet $_,
1710             $_[0]->look_down(_tag => qr/^(?:link|style)\z/);
1711             }
1712             }
1713              
1714             sub _register_magic_node_list { # adds the node list to the list of magic
1715             # node lists that get notified automatic-
1716             # ally whenever the doc structure changes
1717 204     204   175 push @{$_[0]{_HTML_DOM_node_lists}}, $_[1];
  204         340  
1718 204         318 weaken $_[0]{_HTML_DOM_node_lists}[-1];
1719             }
1720              
1721              
1722              
1723             1;
1724             __END__