File Coverage

blib/lib/HTML/DOM.pm
Criterion Covered Total %
statement 428 432 99.0
branch 172 188 91.4
condition 80 100 80.0
subroutine 106 106 100.0
pod 55 59 93.2
total 841 885 95.0


line stmt bran cond sub pod time code
1             package HTML::DOM;
2              
3             # If you are looking at the source code (which you are obviously doing
4             # if you are reading this), note that '# ~~~' is my way of marking
5             # something to be done still (except in this sentence).
6              
7              
8 24     24   179837 use 5.008003;
  24         55  
9              
10 24     24   85 use strict;
  24         26  
  24         395  
11 24     24   67 use warnings;
  24         29  
  24         492  
12              
13 24     24   67 use Carp 'croak';
  24         32  
  24         911  
14 24     24   9991 use HTML::DOM::Element;
  24         48  
  24         1012  
15 24     24   101 use HTML::DOM::Exception 'NOT_SUPPORTED_ERR';
  24         26  
  24         802  
16 24     24   77 use HTML::DOM::Node 'DOCUMENT_NODE';
  24         24  
  24         651  
17 24     24   73 use Scalar::Util 'weaken';
  24         19  
  24         683  
18 24     24   71 use URI;
  24         28  
  24         2579  
19              
20             our $VERSION = '0.056';
21             our @ISA = 'HTML::DOM::Node';
22              
23             require HTML::DOM::Collection;
24             require HTML::DOM::Comment;
25             require HTML::DOM::DocumentFragment;
26             require HTML::DOM::Implementation;
27             require HTML::DOM::NodeList::Magic;
28             require HTML::DOM::Text;
29             require HTML::Tagset;
30             require HTML::DOM::_TreeBuilder;
31              
32             use overload fallback => 1,
33             '%{}' => sub {
34 20284     20284   23495 my $self = shift;
35             #return $self; # for debugging
36 20284 100 100     80485 $self->isa(scalar caller) || caller->isa('HTML::DOM::_TreeBuilder')
37             and return $self;
38 3         11 $self->forms;
39 24     24   85 };
  24         25  
  24         125  
40              
41              
42             =head1 NAME
43              
44             HTML::DOM - A Perl implementation of the HTML Document Object Model
45              
46             =head1 VERSION
47              
48             Version 0.056 (alpha)
49              
50             B This module is still at an experimental stage. The API is
51             subject to change without
52             notice.
53              
54             =head1 SYNOPSIS
55              
56             use HTML::DOM;
57            
58             my $dom_tree = new HTML::DOM; # empty tree
59             $dom_tree->write($source_code);
60             $dom_tree->close;
61            
62             my $other_dom_tree = new HTML::DOM;
63             $other_dom_tree->parse_file($filename);
64            
65             $dom_tree->getElementsByTagName('body')->[0]->appendChild(
66             $dom_tree->createElement('input')
67             );
68            
69             print $dom_tree->innerHTML, "\n";
70              
71             my $text = $dom_tree->createTextNode('text');
72             $text->data; # get attribute
73             $text->data('new value'); # set attribute
74            
75             =head1 DESCRIPTION
76              
77             This module implements the HTML Document Object Model by extending the
78             HTML::Tree modules. The HTML::DOM class serves both as an HTML parser and
79             as the document class.
80              
81             The following DOM modules are currently supported:
82              
83             Feature Version (aka level)
84             ------- -------------------
85             HTML 2.0
86             Core 2.0
87             Events 2.0
88             UIEvents 2.0
89             MouseEvents 2.0
90             MutationEvents 2.0
91             HTMLEvents 2.0
92             StyleSheets 2.0
93             CSS 2.0 (partially)
94             CSS2 2.0
95             Views 2.0
96              
97             StyleSheets, CSS and CSS2 are actually provided by L. This list
98             corresponds to CSS::DOM versions 0.02 to 0.14.
99              
100             =for comment
101             Level 2 interfaces not yet included: Range, Traversal
102              
103             =head1 METHODS
104              
105             =head2 Construction and Parsing
106              
107             =over 4
108              
109             =item $tree = new HTML::DOM %options;
110              
111             This class method constructs and returns a new HTML::DOM object. The
112             C<%options>, which are all optional, are as follows:
113              
114             =over 4
115              
116             =item url
117              
118             The value that the C method will return. This value is also used by
119             the C method.
120              
121             =item referrer
122              
123             The value that the C method will return
124              
125             =item response
126              
127             An HTTP::Response object. This will be used for information needed for
128             writing cookies. It is expected to have a reference to a request object
129             (accessible via its C method--see L). Passing a
130             parameter to the 'cookie' method will be a no-op
131             without this.
132              
133             =item weaken_response
134              
135             If this is passed a true value, then the HTML::DOM object will hold a weak
136             reference to the response.
137              
138             =item cookie_jar
139              
140             An HTTP::Cookies object. As with C, if you omit this, arguments
141             passed to the
142             C method will be ignored.
143              
144             =item charset
145              
146             The original character set of the document. This does not affect parsing
147             via the C method (which always assumes Unicode). C will
148             use this, if specified, or L otherwise.
149             L's C method uses this to encode form data
150             unless the form has a valid 'accept-charset' attribute.
151              
152             =back
153              
154             If C and C are omitted, they can be inferred from
155             C.
156              
157             =cut
158              
159             {
160             # This HTML::DOM::Element::HTML package represents the
161             # documentElement. It inherits from
162             # HTML::DOM::_TreeBuilder and acts
163             # as the parser. It is also used as a parser for innerHTML.
164              
165             # Note for potential developers: You can’t refer to ->parent in
166             # this package and expect it to provide the document, because
167             # that’s not the case with innerHTML. Use ->ownerDocument.
168             # Use ->parent only to distinguish between innerHTML and
169             # the regular parser.
170              
171             # Concerning magic associations between forms and fields: To cope
172             # with bad markup, an implicitly closed form (with no end tag) is
173             # associated with any form fields that occur after that are not
174             # inside any form. So when a start tag for a form is encountered,
175             # we make that the ‘current form’, by pushing it on to
176             # @{ $$self{_HTML_DOM_cf} }. When the element is closed, if it
177             # is closed by an end tag, we simply pop it off the cf array. If
178             # it is implicitly closed we pop it off and also make it the
179             # ‘magic form’ (_HTML_DOM_mg_f). When we encounter a form field,
180             # we give it a magic association with the form if the cf
181             # stack is empty.
182              
183              
184             package HTML::DOM::Element::HTML;
185             our @ISA = qw' HTML::DOM::Element HTML::DOM::_TreeBuilder';
186              
187 24     24   2342 use Scalar::Util qw 'weaken isweak';
  24         26  
  24         31299  
188              
189             # I have to override this so it doesn't delete _HTML_DOM_* attri-
190             # butes and so that it doesn’t rebless the object.
191             sub elementify {
192 93     93   87 my $self = shift;
193 93 100       2500 my %attrs = map /^[a-z_]*\z/ ? () : ($_ => $self->{$_}),
194             keys %$self;
195 93         1263 my @weak = grep isweak $self->{$_}, keys %$self;
196 93         365 $self->SUPER::elementify;
197 93         585 %$self = (%$self, %attrs); # this invigorates feeble refs
198 93         500 weaken $self->{$_} for @weak;
199             }
200              
201             sub new {
202 147     147   133 my $tb; # hafta declare it separately so the closures can
203             # c it
204             ($tb = shift->HTML::DOM::_TreeBuilder::new(
205             element_class => 'HTML::DOM::Element',
206             'tweak_~text' => sub {
207 547     547   569 my ($text, $parent) = @_;
208             # $parent->ownerDocument will be undef if
209             # $parent is the doc.
210 547   33     1140 $parent->splice_content( -1,1,
211             ($parent->ownerDocument || $parent)
212             ->createTextNode($text) );
213             $parent->content_offset(
214             $$tb{_HTML_DOM_tb_c_offset}
215 547         1390 );
216             },
217             'tweak_*' => sub {
218 807     807   814 my($elem, $tag, $doc_elem) = @_;
219 807 100       1218 $tag =~ /^~/ and return;
220              
221 800 100       1095 if(
222             $tag eq 'link'
223             ) {
224 16         31 HTML'DOM'Element'Link'_reset_style_sheet(
225             $elem
226             );
227             }
228              
229             # If a form is being closed, determine
230             # whether it is closed implicitly and set
231             # the current form and magic form
232             # accordingly.
233 800 100       1022 if($tag eq 'form') {
234             pop
235 40 50       39 @{$$doc_elem{_HTML_DOM_cf}||[]};
  40         85  
236             delete $$doc_elem{_HTML_DOM_etif}
237             or $$doc_elem{_HTML_DOM_mg_f}
238 40 100       106 = $elem
239             }
240              
241             # If a formie is being closed, create a
242             # magic association where appropriate.
243 800 100 100     2749 if(!$$doc_elem{_HTML_DOM_no_mg}
      66        
      66        
      100        
244             and $tag =~ /^(?:
245             button|(?:
246             fieldse|inpu|(?:obj|sel)ec
247             )t|label|textarea
248             )\z/x
249             and $$doc_elem{_HTML_DOM_mg_f}
250             and !$$doc_elem{_HTML_DOM_cf}
251             ||!@{$$doc_elem{_HTML_DOM_cf}}) {
252             $elem->form(
253             $$doc_elem{_HTML_DOM_mg_f}
254 9         27 );
255 9         26 $doc_elem->ownerDocument->
256             magic_forms(1);
257             }
258              
259             my $event_offsets = delete
260             $elem->{_HTML_DOM_tb_event_offsets}
261 800 100       1855 or return;
262 4         7 _create_events(
263             $doc_elem, $elem, $event_offsets
264             );
265             },
266 147         1041 ))
267             ->ignore_ignorable_whitespace(0); # stop eof()'s cleanup
268 147         354 $tb->store_comments(1); # from changing an
269 147         335 $tb->unbroken_text(1); # necessary, con- # elem_han-
270             # sidering what # dler's view
271             # _tweak_~text does # of the tree
272              
273             # Web browsers preserve whitespace, at least from the point
274             # of view of the DOM; but the main reason we are using this
275             # option is that a parser for innerHTML doesn’t know
276             # whether the nodes will be inserted in a
. 
277 147         305 no_space_compacting $tb 1;
278              
279 147         667 $tb->handler(text => "text", # so we can get line
280             "self, text, is_cdata, offset"); # numbers for scripts
281 147         472 $tb->handler(start => "start",
282             "self, tagname, attr, attrseq, offset, tokenpos");
283 147         442 $tb->handler((declaration=>)x2,'self,tagname,tokens,text');
284              
285 147         445 $tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'};
286              
287 147         199 my %opts = @_;
288 147         178 $tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms};
289             # used by an element’s innerHTML
290              
291             # We have to copy it like this, because our circular ref-
292             # erence is thus: $tb -> object -> closure -> $tb
293             # We can’t weaken $tb without a copy of it, because it is
294             # the only reference to the object.
295 147         130 my $life_raft = $tb; weaken $tb; $tb;
  147         202  
  147         344  
296             }
297              
298             sub start {
299 594 100   594   1175 return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit
300            
301 589         490 my $tokenpos = pop;
302 589         428 my $offset = pop;
303 589         467 my %event_offsets;
304 589         423 my $attr_names = pop;
305 589         1213 for(0..$#$attr_names) {
306             $$attr_names[$_] =~ /^on(.*)/is
307 535 100       1333 and $event_offsets{$1} =
308             $$tokenpos[$_*4 + 4] + $offset;
309             }
310              
311 589         1453 my $elem = (my $self = shift)->SUPER::start(@_);
312            
313 589 100 100     936 $_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] },
  40         158  
314             $elem;
315              
316 589 100       4212 return $elem unless %event_offsets;
317              
318 5 100       17 if(!$HTML::Tagset::emptyElement{$_[0]}) { # container
319             $$elem{_HTML_DOM_tb_event_offsets} =
320 4         7 \%event_offsets;
321             } else {
322 1         3 _create_events(
323             $self,
324             $elem,
325             \%event_offsets,
326             );
327             }
328              
329 5         31 return $elem;
330             }
331              
332             sub _create_events {
333 5     5   7 my ($doc_elem,$elem,$event_offsets) = @_;
334 5 100       14 defined(my $event_attr_handler =
335             $doc_elem->ownerDocument->event_attr_handler)
336             or return;
337 3         7 for(keys %$event_offsets) {
338             my $l =
339             &$event_attr_handler(
340             $elem,
341             $_,
342             $elem->attr("on$_"),
343 3         19 $$event_offsets{$_}
344             );
345 3 50       280 defined $l and
346             $elem->event_handler (
347             $_, $l
348             );
349             }
350             }
351              
352             sub text {
353 547     547   702 $_[0]{_HTML_DOM_tb_c_offset} = pop;
354 547         1128 shift->SUPER::text(@_)
355             }
356              
357             sub insert_element {
358 860     860   1071 my ($self, $tag) = (shift, @_);
359 860 100 100     2482 if((ref $tag ? $tag->tag : $tag) eq 'tr'
    100          
360             and $self->pos->tag eq 'table') {
361 12         23 $self->insert_element('tbody', 1);
362             }
363 860         1772 $self->SUPER::insert_element(@_);
364             }
365              
366             sub end {
367 601     601   533 my $self = shift;
368              
369             # If this is a form, record that we’ve seen an end tag, so
370             # that this does not become a ‘magic form’.
371             ++$$self{_HTML_DOM_etif} # end tag is 'form'
372 601 100       1009 if $_[0] eq 'form';
373              
374             # Make sure cannot close a cell outside the cur-
375             # rent table.
376 601 100       1066 $_[0] =~ /^t[hd]\z/ and @_ = (\$_[0], 'table');
377              
378             # HTML::TreeBuilder expects the element to be the
379             # topmost element, and gets confused when it’s inside the
380             # ~doc. It sets _pos to the doc when it encounters .
381             # This works around that.
382 601         564 my $pos = $self->{_pos};
383 601         1327 my @ret = $self->SUPER::end(@_);
384             $self->{_pos} = $pos
385 600 100 100     1497 if ($self->{_pos}||return @ret)->{_tag} eq '~doc';
386 561         1697 @ret; # TB relies on this retval
387             }
388              
389             sub declaration {
390 9     9   17 my($self,$tagname,$tokens,$source) = @_;
391             return
392 9 100 66     50 unless $tagname eq 'doctype'
393             and my $parent = $self->parent;
394             package HTML::DOM; # bypass overloading
395             $parent->{_HTML_DOM_doctype} = $source
396 8 50       11 unless defined $parent->{_HTML_DOM_doctype};
397 8 100       29 return unless @$tokens > 3;
398 7         16 for ($self->{_HTML_DOM_version} = $tokens->[3]){
399 7 50       81 s/^['"]// and s/['"]\z//;
400             }
401             }
402              
403 1147     1147   2939 sub element_class { 'HTML::DOM::Element' }
404              
405             # HTMLHtmlElement interface
406 5     5   362 sub version { shift->_attr('version' => @_) }
407              
408             } # end of special TreeBuilder package
409              
410             sub new {
411 92     92 1 2033822 my $self = shift->SUPER::new('~doc');
412              
413 92         153 my %opts = @_;
414 92         1358 $self->{_HTML_DOM_url} = $opts{url}; # might be undef
415 92         272 $self->{_HTML_DOM_referrer} = $opts{referrer}; # might be undef
416 92 100       255 if($opts{response}) {
417 9         12 $self->{_HTML_DOM_response} = $opts{response};
418 9 100       13 if(!defined $self->{_HTML_DOM_url}) {{
419 7         6 $self->{_HTML_DOM_url} =
420             ($opts{response}->request || last)
421 7   100     18 ->url;
422             }}
423 9 100       34 if(!defined $self->{_HTML_DOM_referrer}) {{
424 7         6 $self->{_HTML_DOM_referrer} =
425             ($opts{response}->request || last)
426 7   100     14 ->header('Referer')
427             }}
428 9 100       42 if($opts{weaken_response}) {
429             weaken $self->{_HTML_DOM_response}
430 1         2 }
431             }
432 92         116 $self->{_HTML_DOM_jar} = $opts{cookie_jar}; # might be undef
433 92         113 $self->{_HTML_DOM_cs} = $opts{charset};
434              
435 92         321 $self;
436             }
437              
438             =item $tree->elem_handler($elem_name => sub { ... })
439              
440             If you call this method first, then, when the DOM tree is in the
441             process of
442             being built (as a result of a call to C or C), the
443             subroutine will be called after each C<$elem_name> element
444             is
445             added to the tree. If you give '*' as the element name, the subroutine
446             will be called for each element that does not have a handler. The
447             subroutine's
448             two arguments will be the tree itself
449             and the element in question. The subroutine can call the DOM object's
450             C
451             method to insert HTML code into the source after the element.
452              
453             Here is a lame example (which does not take Content-Script-Type headers
454             or security into account):
455              
456             $tree->elem_handler(script => sub {
457             my($document,$elem) = @_;
458             return unless $elem->attr('type') eq 'application/x-perl';
459             eval($elem->firstChild->data);
460             });
461              
462             $tree->write(
463             '

The time is

464            
467             precisely.
468            

'
469             );
470             $tree->close;
471              
472             print $tree->documentElement->as_text, "\n";
473              
474             (Note: L's
475             L|HTML::DOM::Element/content_offset> method might come in
476             handy for reporting line numbers for script errors.)
477              
478             =cut
479              
480             sub elem_handler {
481 133     133 1 747 my ($self,$elem_name,$sub) = @_;
482              
483             # ~~~ temporary; for internal use only:
484 133 100       303 @_ < 3 and return $$self{_HTML_DOM_nih}{$elem_name};
485              
486 9         14 $$self{_HTML_DOM_nih}{$elem_name} = $sub; # nih = node inser-
487             # tion handler
488             my $h = $self->{_HTML_DOM_elem_handlers}{$elem_name} = sub {
489             # I can’t put $doc_elem outside the closure, because
490             # ->open replaces it with another object, and we’d be
491             # referring to the wrong one.
492 16     16   15 my $doc_elem = $_[2];
493 16         29 $doc_elem->{_HTML_DOM_tweakall}->(@_);
494 16         29 $self->_modified; # in case there are node lists hanging
495             # around that the handler references
496 16         37 &$sub($self, $_[0]);
497              
498             # See the comment in sub write.
499 15         310 (my $level = $$self{_HTML_DOM_buffered});
500 15 50 66     72 if( $level
      66        
      66        
501             and ($level -= 1, 1)
502             and $$self{_HTML_DOM_p}
503             and $$self{_HTML_DOM_p}[$level]
504             ) {
505 7         7 $$self{_HTML_DOM_p}[$level]->eof;
506             $level
507 1         1 ? --$#{$$self{_HTML_DOM_p}}
508 7 100       19 : delete $$self{_HTML_DOM_p};
509             }
510 9         32 };
511 9 100       12 if(my $p = $$self{_HTML_DOM_parser}) {
512 1         2 $$p{"_tweak_$elem_name"} = $h
513             }
514 9         24 weaken $self;
515 9         13 return;
516             }
517              
518              
519             =item css_url_fetcher( \&sub )
520              
521             With this method you can provide a subroutine that fetches URLs referenced
522             by 'link' tags. Its sole argument is the URL, which is made absolute based
523             on the HTML page's own base URL (it is assumed that this is absolute). It
524             should return C or an empty list on failure. Upon
525             success, it should return just the CSS code, if it has been decoded (and is
526             in Unicode), or, if it has not been decoded, the CSS code followed by
527             C<< decode => 1 >>. See L for details on
528             when you should or should not decode it. (Note that HTML::DOM
529             automatically
530             provides an encoding hint based on the HTML document.)
531              
532             HTML::DOM passes the result of the url fetcher to L and
533             turns
534             it into a style sheet object accessible via the link element's
535             L|HTML::DOM::Element::Link/sheet> method.
536              
537             =cut
538              
539             sub css_url_fetcher {
540 11     11 1 598 my $old = (my $self = shift)->{_HTML_DOM_cuf};
541 11 100       21 $self->{_HTML_DOM_cuf} = shift if @_;
542 11 100       41 $old||();
543             }
544              
545             =item $tree->write(...) (DOM method)
546              
547             This parses the HTML code passed to it, adding it to the end of
548             the
549             document. It assumes that its input is a normal Perl Unicode string. Like
550             L's
551             C method, it can take a coderef.
552              
553             When it is called from an an element handler (see
554             C, above), the value passed to it
555             will be inserted into the HTML code after the current element when the
556             element handler returns. (In this case a coderef won't do--maybe that will
557             be added later.)
558              
559             If the C method has been called, C will call C before
560             parsing the HTML code passed to it.
561              
562             =item $tree->writeln(...) (DOM method)
563              
564             Just like C except that it appends "\n" to its argument and does
565             not work with code refs. (Rather
566             pointless, if you ask me. :-)
567              
568             =item $tree->close() (DOM method)
569              
570             Call this method to signal to the parser that the end of the HTML code has
571             been reached. It will then parse any residual HTML that happens to be
572             buffered. It also makes the next C call C.
573              
574             =item $tree->open (DOM method)
575              
576             Deletes the HTML tree, resetting it so that it has just an element,
577             and a parser hungry for HTML code.
578              
579             =item $tree->parse_file($file)
580              
581             This method takes a file name or handle and parses the content,
582             (effectively) calling C afterwards. In the former case (a file
583             name), L will be used to detect the encoding. In the
584             latter (a file handle), you'll have to C it yourself. This could
585             be considered a bug. If you have a solution to this (how to make
586             HTML::Encoding detect an encoding from a file handle), please let me know.
587              
588             As of version 0.12, this method returns true upon success, or undef/empty
589             list on failure.
590              
591             =item $tree->charset
592              
593             This method returns the name of the character
594             set that was passed to C, or, if that was not given, that which
595             C used.
596              
597             It returns undef if C was not given a charset and if C was
598             not
599             used or was
600             passed a file handle.
601              
602             You can also set the charset by passing an argument, in which case the old
603             value is returned.
604              
605              
606             =cut
607              
608             sub parse_file {
609 5     5 1 9 my $file = $_[1];
610              
611 5         11 $_[0]->open;
612              
613             # This ‘if’ statement uses the same check that HTML::Parser uses.
614             # We are not strictly checking to see whether it’s a handle,
615             # but whether HTML::Parser would consider it one.
616 5 50 33     32 if (ref($file) || ref(\$file) eq "GLOB") {
617             (my $a = shift->{_HTML_DOM_parser})
618 0 0       0 ->parse_file($file) || return;
619 0         0 $a ->elementify;
620 0         0 return 1;
621             }
622              
623 24     24   116 no warnings 'parenthesis'; # 5.8.3 Grrr!!
  24         32  
  24         13037  
624 5 100       8 if(my $charset = $_[0]{_HTML_DOM_cs}) {
625 3 50       72 open my $fh, $file or return;
626 3         15 $charset =~ s/^(?:x-?)?mac-?/mac/i;
627 3     1   47 binmode $fh, ":encoding($charset)";
  1         4  
  1         1  
  1         4  
628             $$_{_HTML_DOM_parser}->parse_file($fh) || return,
629             $_->close
630 3   50     2974 for shift;
631 3         73 return 1;
632             }
633              
634 2 100       80 open my $fh, $file or return;
635 1         3 local $/;
636 1         13 my $contents = <$fh>;
637 1         457 require HTML::Encoding;
638 1   50     9434 my $encoding = HTML::Encoding::encoding_from_html_document(
639             $contents
640             ) || 'iso-8859-1';
641             # Since we’ve already slurped the file, we might as well
642             # avoid having HTML::Parser read it again, even if we could
643             # use binmode.
644 1         4093 require Encode;
645             $_->write(Encode::decode($encoding, $contents)), $_->close,
646             $_->{_HTML_DOM_cs} = $encoding
647 1         5 for shift;
648 1         35 return 1;
649             }
650              
651             sub charset {
652 38     38 1 998 my $old = (my$ self = shift)->{_HTML_DOM_cs};
653 38 100       82 $self->{_HTML_DOM_cs} = shift if @_;
654 38         122 $old;
655             }
656              
657             sub write {
658 110     110 1 11014 my $self = shift;
659 110 100       151 if($$self{_HTML_DOM_buffered}) {
660             # Although we call this buffered, it’s actually not. Before
661             # version 0.040, a recursive call to ->write on the same
662             # doc object would simply record the HTML code in a buffer
663             # that was processed when the elem handler that made the
664             # inner call to ->write finished. Every elem handler would
665             # have a wrapper (created in the elem_handler sub above)
666             # that took care of this after calling the handler, by cre-
667             # ating a new, temporary, parser object that would call the
668             # start/end, etc., methods of our tree builder.
669             #
670             # This approach stops JS code like this from working (yes,
671             # there *are* websites with code like this!):
672             # document.write("")
673             # document.getElementById("img1").src="..."
674             #
675             # So, now we take care of creating a new parser immedi-
676             # ately. This does mean, however that we end up with mul-
677             # tiple parser objects floating around in the case of
678             # nested . So we have to be careful to create and
679             # delete them at the right time.
680              
681             # $$self{_HTML_DOM_buffered} actually contains a number
682             # indicating the number of nested calls to ->write.
683 7         8 my $level = $$self{_HTML_DOM_buffered};
684 7         11 local $$self{_HTML_DOM_buffered} = $level + 1;
685              
686 7         10 my($doc_elem) = $$self{_HTML_DOM_parser};
687              
688             # These handlers delegate the handling to methods of
689             # *another* HTML::Parser object.
690             my $p = $$self{_HTML_DOM_p}[$level-1] ||=
691             HTML::Parser->new(
692             start_h => [
693 5     5   10 sub { $doc_elem->start(@_) },
694             'tagname, attr, attrseq'
695             ],
696             end_h => [
697 1     1   3 sub { $doc_elem->end(@_) },
698             'tagname, text'
699             ],
700             text_h => [
701 7     7   9 sub { $doc_elem->text(@_) },
702 7   33     9 'text, is_cdata'
703             ],
704             );
705              
706 7         243 $p->unbroken_text(1); # push_content, which is called by
707             # H:TB:text, won't concatenate two
708             # text portions if the first one
709             # is a node.
710              
711 7         27 $p->parse(shift);
712              
713             # We can’t get rid of our parser at this point, as a subse-
714             # quent ->write call from the same nested level (e.g., from
715             # the same ), then we need to remove it, so we have
719             # elem_handler do that for us.
720             }
721             else {
722             my $parser
723             = $$self{_HTML_DOM_parser}
724 103   66     117 || ($self->open, $$self{_HTML_DOM_parser});
725 103         142 local $$self{_HTML_DOM_buffered} = 1;
726 103         1055 $parser->parse($_) for @_;
727             }
728 110         237 $self->_modified;
729             return # nothing;
730 110         209 }
731              
732 4     4 1 7 sub writeln { shift->write(@_,"\n") }
733              
734             sub close {
735 97     97 1 587 my $a = (my $self = shift)->{_HTML_DOM_parser};
736 97 100       200 return unless $a;
737              
738             # We can’t use eval { $a->eof } because that would catch errors
739             # that are meant to propagate (a nasty bug [the so-called
740             # ‘content—offset’ bug] was hidden because of an eval in ver-
741             # sion 0.010).
742             # return unless $a->can('eof');
743            
744 94         323 $a->eof(@_);
745 93         109 delete $$self{_HTML_DOM_parser};
746 93         186 $a->elementify;
747             return # nothing;
748 93         176 }
749              
750             sub open {
751 116     116 1 1988 (my $self = shift)->detach_content;
752              
753             # We have to use push_content instead of simply putting it there
754             # ourselves, because push_content takes care of weakening the
755             # parent (and that code doesn’t belong in this package).
756             $self->push_content(
757 116         1265 my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML
758             );
759              
760 116         2295 delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>};
761              
762 116 100       269 return unless $self->{_HTML_DOM_elem_handlers};
763 14         18 for(keys %{$self->{_HTML_DOM_elem_handlers}}) {
  14         15  
764             $$tb{"_tweak_$_"} =
765 14         19 $self->{_HTML_DOM_elem_handlers}{$_}
766             }
767              
768             return # nothing;
769 14         25 }
770              
771             =back
772              
773             =head2 Other DOM Methods
774              
775             =over 4
776              
777             =cut
778              
779              
780             #-------------- DOM STUFF (CORE) ---------------- #
781              
782             =item doctype
783              
784             Returns nothing
785              
786             =item implementation
787              
788             Returns the L object.
789              
790             =item documentElement
791              
792             Returns the element.
793              
794             =item createElement ( $tag )
795              
796             =item createDocumentFragment
797              
798             =item createTextNode ( $text )
799              
800             =item createComment ( $text )
801              
802             =item createAttribute ( $name )
803              
804             Each of these creates a node of the appropriate type.
805              
806             =item createProcessingInstruction
807              
808             =item createEntityReference
809              
810             These two throw an exception.
811              
812             =for comment
813             =item createCSSStyleSheet
814             This creates a style sheet (L object).
815              
816             =item getElementsByTagName ( $name )
817              
818             C<$name> can be the name of the tag, or '*', to match all tag names. This
819             returns a node list object in scalar context, or a list in list context.
820              
821             =item importNode ( $node, $deep )
822              
823             Clones the C<$node>, setting its C attribute to the document
824             with which this method is called. If C<$deep> is true, the C<$node> will
825             be
826             cloned recursively.
827              
828             =cut
829              
830       2 1   sub doctype {} # always null
831              
832             sub implementation {
833 24     24   101 no warnings 'once';
  24         28  
  24         14675  
834 2     2 1 4 return $HTML::DOM::Implementation::it;
835             }
836              
837             sub documentElement {
838 103     103 1 517 ($_[0]->content_list)[0]
839             }
840              
841             sub createElement {
842 354     354 1 59433 my $elem = HTML::DOM::Element->new($_[1]);
843 354         1237 $elem->_set_ownerDocument(shift);
844 354         884 $elem;
845             }
846              
847             sub createDocumentFragment {
848 9     9 1 1236 my $thing = HTML::DOM::DocumentFragment->new;
849 9         25 $thing->_set_ownerDocument(shift);
850 9         16 $thing;
851             }
852              
853             sub createTextNode {
854 606     606 1 2543 my $thing = HTML::DOM::Text->new(@_[1..$#_]);
855 606         1220 $thing->_set_ownerDocument(shift);
856 606         1433 $thing;
857             }
858              
859             sub createComment {
860 7     7 1 309 my $thing = HTML::DOM::Comment->new(@_[1..$#_]);
861 7         24 $thing->_set_ownerDocument(shift);
862 7         21 $thing;
863             }
864              
865             sub createCDATASection {
866 1     1 0 261 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
867             'The HTML DOM does not support CDATA sections' );
868             }
869              
870             sub createProcessingInstruction {
871 1     1 1 189 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
872             'The HTML DOM does not support processing instructions' );
873             }
874              
875             sub createAttribute {
876 22     22 1 1697 my $thing = HTML::DOM::Attr->new(@_[1..$#_]);
877 22         62 $thing->_set_ownerDocument(shift);
878 22         49 $thing;
879             }
880              
881             sub createEntityReference {
882 1     1 1 248 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
883             'The HTML DOM does not support entity references' );
884             }
885              
886             #sub createCSSStyleSheet {
887             # shift;
888             # require CSS'DOM;
889             # ~~~
890             #}
891              
892             sub getElementsByTagName {
893 17     17 1 374 my($self,$tagname) = @_;
894             #warn "You didn't give me a tag name." if !defined $tagname;
895 17 100       38 if (wantarray) {
896 4 100       24 return $tagname eq '*'
897             ? grep tag $_ !~ /^~/, $self->descendants
898             : $self->find($tagname);
899             }
900             else {
901             my $list = HTML::DOM::NodeList::Magic->new(
902             $tagname eq '*'
903 2     2   6 ? sub { grep tag $_ !~ /^~/, $self->descendants }
904 17     17   60 : sub { $self->find($tagname) }
905 13 100       123 );
906 13         33 $self-> _register_magic_node_list($list);
907 13         122 $list;
908             }
909             }
910              
911             sub importNode {
912 12     12 1 214 my ($self, $node, $deep) = @_;
913 12 100       39 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
914             'Documents cannot be imported.' )
915             if $node->nodeType ==DOCUMENT_NODE;
916 10         34 (my $clown = $node->cloneNode($deep))
917             ->_set_ownerDocument($self);
918 10 100       46 if($clown->can('descendants')) { # otherwise it’s an Attr, so this
919 8         16 for($clown->descendants) { # isn’t necessary
920 4         4 delete $_->{_HTML_DOM_owner};
921             }}
922 10         18 $clown;
923             }
924              
925             #-------------- DOM STUFF (HTML) ---------------- #
926              
927             =item alinkColor
928              
929             =item background
930              
931             =item bgColor
932              
933             =item fgColor
934              
935             =item linkColor
936              
937             =item vlinkColor
938              
939             These six methods return (optionally set) the corresponding attributes of
940             the body element. Note that most of the names do not map directly to the
941             names of
942             the attributes. C refers to the C attribute. Those that
943             end
944             with 'linkColor' refer to the attributes of the same name but without the
945             'Color' on the end.
946              
947             =cut
948              
949 6   100 6 1 10 sub alinkColor { (shift->body||return "")->aLink (@_) }
950 6   100 6 1 731 sub background { (shift->body||return "")->background(@_) }
951 6   100 6 1 712 sub bgColor { (shift->body||return "")->bgColor (@_) }
952 6   100 6 1 766 sub fgColor { (shift->body||return "")->text (@_) }
953 6   100 6 1 772 sub linkColor { (shift->body||return "")->link (@_) }
954 6   100 6 1 766 sub vlinkColor { (shift->body||return "")->vLink (@_) }
955              
956             =item title
957              
958             Returns (or optionally sets) the title of the page.
959              
960             =item referrer
961              
962             Returns the page's referrer.
963              
964             =item domain
965              
966             Returns the domain name portion of the document's URL.
967              
968             =item URL
969              
970             Returns the document's URL.
971              
972             =item body
973              
974             Returns the body element, or the outermost frame set if the document has
975             frames. You can set the body by passing an element as an argument, in
976             which
977             case the old body element is returned.
978              
979             =item images
980              
981             =item applets
982              
983             =item links
984              
985             =item forms
986              
987             =item anchors
988              
989             These five methods each return a list of the appropriate elements in list
990             context, or an L object in scalar context. In this
991             latter case, the object will update automatically when the document is
992             modified.
993              
994             In the case of C you can access those by using the HTML::DOM object
995             itself as a hash. I.e., you can write C<< $doc->{f} >> instead of
996             S<< C<< $doc->forms->{f} >> >>.
997              
998             =for comment
999             # ~~~ Why on earth did I ever put this in the docs?!
1000             B I need to make these methods cache the HTML collection objects
1001             that they create. Once I've done this, I can make list context use those
1002             objects, as well as scalar context.
1003              
1004             =item cookie
1005              
1006             This returns a string containing the document's cookies (the format may
1007             still change). If you pass an
1008             argument, it
1009             will set a cookie as well. Both Netscape-style and RFC2965-style cookie
1010             headers are supported.
1011              
1012             =cut
1013              
1014             sub title {
1015 12     12 1 290 my $doc = shift;
1016 12 100       28 if(my $title_elem = $doc->find('title')) {
1017 8         19 $title_elem->text(@_);
1018             }
1019             else {
1020 4 100       10 return "" unless @_;
1021 3   66     6 ( $doc->find('head')
1022             || ( $doc->find('html')
1023             || $doc->appendChild($doc->createElement('html'))
1024             )->appendChild($doc->createElement('head'))
1025             )->appendChild(
1026             my $t = $doc->createElement('title')
1027             );
1028 3         9 $t->text(@_);
1029 3         9 return "";
1030             }
1031             }
1032              
1033             sub referrer {
1034 5     5 1 740 my $referrer = shift->{_HTML_DOM_referrer};
1035 5 50       19 defined $referrer ? $referrer : ();
1036             }
1037              
1038 24     24   103 sub domain { no strict;
  24         26  
  24         14253  
1039 2     2 1 4 my $doc = shift;
1040 2         1 host {ref $doc->{_HTML_DOM_url} ? $doc->{_HTML_DOM_url}
1041 2 100       4 : ($doc->{_HTML_DOM_url} = URI->new($doc->{_HTML_DOM_url}))};
1042             }
1043              
1044             sub URL {
1045 151     151 1 1281 my $url = shift->{_HTML_DOM_url};
1046 151 100       529 defined $url ? "$url" : undef;
1047             }
1048              
1049             sub body { # ~~~ this needs to return the outermost frameset element if
1050             # there is one (if the frameset is always the second child
1051             # of , then it already does).
1052 78     78 1 1505 my $body = ($_[0]->documentElement->content_list)[1];
1053 78 100 66     276 if (!$body || $body->tag !~ /^(?:body|frameset)\z/) {
1054 24         40 $body = $_[0]->find('body','frameset');
1055             }
1056 78 100       128 if(@_>1) {
1057 2         3 my $doc_elem = $_[0]->documentElement;
1058             # I'm using the replaceChild rather than replace_with,
1059             # despite the former's convoluted syntax, since the former
1060             # has the appropriate error-checking code (or will), and
1061             # also because it triggers mutation events.
1062 2         8 $doc_elem->replaceChild($_[1],$body)
1063             }
1064             else {
1065 76         316 $body
1066             }
1067             }
1068              
1069             sub images {
1070 2     2 1 6 my $self = shift;
1071 2 100       5 if (wantarray) {
1072 1         4 return grep tag $_ eq 'img', $self->descendants;
1073             }
1074             else {
1075             my $collection = HTML::DOM::Collection->new(
1076             my $list = HTML::DOM::NodeList::Magic->new(
1077 1     1   6 sub { grep tag $_ eq 'img', $self->descendants }
1078 1         9 ));
1079 1         3 $self-> _register_magic_node_list($list);
1080 1         20 $collection;
1081             }
1082             }
1083              
1084             sub applets {
1085 2     2 1 3 my $self = shift;
1086 2 100       5 if (wantarray) {
1087 1         3 return grep $_->tag =~ /^(?:objec|apple)t\z/,
1088             $self->descendants;
1089             }
1090             else {
1091             my $collection = HTML::DOM::Collection->new(
1092             my $list = HTML::DOM::NodeList::Magic->new(
1093 1     1   3 sub { grep $_->tag =~ /^(?:objec|apple)t\z/,
1094             $self->descendants }
1095 1         10 ));
1096 1         2 $self-> _register_magic_node_list($list);
1097 1         3 $collection;
1098             }
1099             }
1100              
1101             sub links {
1102 4     4 1 351 my $self = shift;
1103 4 100       10 if (wantarray) {
1104             return grep {
1105 1         3 my $tag = tag $_;
  38         50  
1106 38 100 100     98 $tag eq 'area' || $tag eq 'a'
1107             && defined $_->attr('href')
1108             } $self->descendants;
1109             }
1110             else {
1111             my $collection = HTML::DOM::Collection->new(
1112             my $list = HTML::DOM::NodeList::Magic->new(
1113             sub { grep {
1114 3     3   17 my $tag = tag $_;
  90         139  
1115 90 100 100     228 $tag eq 'area' || $tag eq 'a'
1116             && defined $_->attr('href')
1117             } $self->descendants }
1118 3         24 ));
1119 3         8 $self-> _register_magic_node_list($list);
1120 3         44 $collection;
1121             }
1122             }
1123              
1124             sub forms {
1125 39     39 1 484 my $self = shift;
1126 39 100       66 if (wantarray) {
1127 17         46 return grep tag $_ eq 'form', $self->descendants;
1128             }
1129             else {
1130             my $collection = HTML::DOM::Collection->new(
1131             my $list = HTML::DOM::NodeList::Magic->new(
1132 22     22   71 sub { grep tag $_ eq 'form', $self->descendants }
1133 22         151 ));
1134 22         45 $self-> _register_magic_node_list($list);
1135 22         203 $collection;
1136             }
1137             }
1138              
1139             sub anchors {
1140 2     2 1 2 my $self = shift;
1141 2 100       5 if (wantarray) {
1142 1   100     3 return grep tag $_ eq 'a' && defined $_->attr('name'),
1143             $self->descendants;
1144             }
1145             else {
1146             my $collection = HTML::DOM::Collection->new(
1147             my $list = HTML::DOM::NodeList::Magic->new(
1148 1   100 1   3 sub { grep tag $_ eq 'a' && defined $_->attr('name'),
1149             $self->descendants }
1150 1         34 ));
1151 1         2 $self-> _register_magic_node_list($list);
1152 1         3 $collection;
1153             }
1154             }
1155              
1156              
1157             sub cookie {
1158 8     8 1 445 my $self = shift;
1159 8 100       12 return '' unless defined (my $jar = $self->{_HTML_DOM_jar});
1160 6         7 my $return;
1161 6 50       11 if (defined wantarray) {
1162             # Yes, this is nuts (getting HTTP::Cookies to join the cookies, and
1163             # splitting them, filtering them, and joining them again[!]), but
1164             # &HTTP::Cookies::add_cookie_header is long and complicated, and I
1165             # don't want to replicate it here.
1166 24     24   100 no warnings 'uninitialized';
  24         25  
  24         7547  
1167 6         7 my $reqclone = $self->{_HTML_DOM_response}->request->clone;
1168             # Yes this is a bit strange, but we don’t want to put
1169             # ‘use HTTP::Header 1.59’ in this file, as it would mean loading the
1170             # module even for people who are not using this feature or who are
1171             # duck-typing.
1172 6 50 66     587 if (!$reqclone->can('header_field_names')
1173 0         0 && $reqclone->isa("HTTP::Headers")) { VERSION HTTP::Headers:: 1.59 }
1174 6         13 for($reqclone->header_field_names) {
1175 2 50       80 /cookie/i and remove_header $reqclone $_;
1176             }
1177 6         91 $return = join ';', grep !/\$/,
1178             $jar->add_cookie_header(
1179             $reqclone
1180             )-> header ('Cookie')
1181             # Pieces of this regexp were stolen from HTTP::Headers::Util:
1182             =~ /\G\s* # initial whitespace
1183             (
1184             [^\s=;,]+ # name
1185             \s*=\s* # =
1186             (?:
1187             \"(?:[^\"\\]*(?:\\.[^\"\\]*)*)\" # quoted value
1188             |
1189             [^;,\s]* # unquoted value
1190             )
1191             )
1192             \s*;?
1193             /xg;
1194             }
1195 6 100       1409 if (@_) {
1196 3 50       7 return unless defined $self->{_HTML_DOM_response};
1197 3         13 require HTTP::Headers::Util;
1198             (undef,undef, my%split) =
1199 3         3 @{(HTTP::Headers::Util::split_header_words($_[0]))[0]};
  3         7  
1200 3         224 my $rfc;
1201 3         6 for(keys %split){
1202             # I *hope* this always works! (NS cookies should have no version.)
1203 6 100       14 ++ $rfc, last if lc $_ eq 'version';
1204             }
1205             (my $clone = $self->{_HTML_DOM_response}->clone)
1206 3         6 ->remove_header(qw/ Set-Cookie Set-Cookie2 /);
1207 3         556 $clone->header('Set-Cookie' . 2 x!! $rfc => $_[0]);
1208 3         133 $jar->extract_cookies($clone);
1209             }
1210 6 100       894 $return||'';
1211             }
1212              
1213             =item getElementById
1214              
1215             =item getElementsByName
1216              
1217             =item getElementsByClassName
1218              
1219             These three do what their names imply. The last two
1220             will return a list in list context, or a node list
1221             object in scalar context. Calling them in list
1222             context is probably more efficient.
1223              
1224             =cut
1225              
1226             sub getElementById {
1227 163     163 1 5682 my(@pile) = grep ref($_), @{shift->{'_content'}};
  163         239  
1228 163         177 my $id = shift;
1229 163         111 my $this;
1230 163         260 while(@pile) {
1231 24     24   104 no warnings 'uninitialized';
  24         29  
  24         10978  
1232 2501         1733 $this = shift @pile;
1233 2501 100       3269 $this->id eq $id and return $this;
1234 2339         3119 unshift @pile, grep ref($_), $this->content_list;
1235             }
1236 1         3 return;
1237             }
1238              
1239             sub getElementsByName {
1240 3     3 1 9 my($self,$name) = @_;
1241 3 100       7 if (wantarray) {
1242 2         25 return $self->look_down(name => "$name");
1243             }
1244             else {
1245             my $list = HTML::DOM::NodeList::Magic->new(
1246 1     1   6 sub { $self->look_down(name => "$name"); }
1247 1         5 );
1248 1         2 $self-> _register_magic_node_list($list);
1249 1         3 $list;
1250             }
1251             }
1252              
1253             sub getElementsByClassName {
1254 9     9 1 398 splice @_, 2, @_, 1; # Remove extra elements; add a true third elem
1255 9         25 goto &HTML'DOM'Element'_getElementsByClassName;
1256             }
1257              
1258             # ---------- DocumentEvent interface -------------- #
1259              
1260             =item createEvent ( $category )
1261              
1262             Creates a new event object, believe it or not.
1263              
1264             The C<$category> is the DOM event category, which determines what type of
1265             event object will be returned. The currently supported event categories
1266             are MouseEvents, UIEvents, HTMLEvents and MutationEvents.
1267              
1268             You can omit the C<$category> to create an instance of the event base class
1269             (not officially part of the DOM).
1270              
1271             =cut
1272              
1273             sub createEvent {
1274 267     267 1 2310 require HTML'DOM'Event;
1275 267   100     761 HTML'DOM'Event'create_event($_[1]||'');
1276             }
1277              
1278             # ---------- DocumentView interface -------------- #
1279              
1280             =item defaultView
1281              
1282             Returns the L object associated with the document.
1283              
1284             There is no such object by default; you have to put one there yourself:
1285              
1286             Although it is supposed to be read-only according to the DOM, you can set
1287             this attribute by passing an argument to it. It I still marked as
1288             read-only in
1289             L|HTML::DOM::Interface>.
1290              
1291             If you do set it, it is recommended that the object be a subclass of
1292             L.
1293              
1294             This attribute holds a weak reference to the object.
1295              
1296             =cut
1297              
1298             sub defaultView {
1299 1886     1886 1 1401 my $self = shift;
1300 1886         2126 my $old = $self->{_HTML_DOM_view};
1301 1886 100       3075 if(@_) {
1302 4         5 weaken($self->{_HTML_DOM_view} = shift);
1303             }
1304 1886 100       5551 return defined $old ? $old : ();
1305             }
1306              
1307             # ---------- DocumentStyle interface -------------- #
1308              
1309             =item styleSheets
1310              
1311             Returns a L of the document's style sheets, or a
1312             simple list in list context.
1313              
1314             =cut
1315              
1316             sub styleSheets {
1317 25     25 1 282 my $doc = shift;
1318             my $ret = (
1319             $doc->{_HTML_DOM_sheets} or
1320             $doc->{_HTML_DOM_sheets} = (
1321             require CSS::DOM::StyleSheetList,
1322             new CSS::DOM::StyleSheetList
1323             ),
1324             $doc->_populate_sheet_list,
1325             $doc->{_HTML_DOM_sheets}
1326 25   66     44 );
1327 25 100       88 wantarray ? @$ret : $ret;
1328             }
1329              
1330             =item innerHTML
1331              
1332             Serialises and returns the HTML document. If you pass an argument, it will
1333             set the contents of the document via C, C and C,
1334             returning a serialisation of the old contents.
1335              
1336             =cut
1337              
1338             sub innerHTML {
1339 25     25 1 5910 my $self = shift;
1340 25         22 my $old;
1341 25 50 100     67 $old = join '' , $self->{_HTML_DOM_doctype}||'',
    100          
1342             map
1343             HTML'DOM'Element'_html_element_adds_newline
1344             ? substr((
1345             as_HTML $_ (undef)x2,{}
1346             ), 0, -1)
1347             : $_->as_HTML((undef)x2,{}),
1348             $self->content_list
1349             if defined wantarray;
1350 25 100       59 if(@_){
1351 13         28 $self->open();
1352 13         34 $self->write(shift);
1353 13         27 $self->close();
1354             }
1355             $old
1356 25         86 }
1357              
1358              
1359             =item location
1360              
1361             =item set_location_object (non-DOM)
1362              
1363             C returns the location object, if you've put one there with
1364             C. HTML::DOM doesn't actually implement such an object
1365             itself, but provides the appropriate magic to make
1366             C<< $doc->location($foo) >> translate into
1367             C<< $doc->location->href($foo) >>.
1368              
1369             BTW, the location object had better be true when used as a boolean, or
1370             HTML::DOM will think it doesn't exist.
1371              
1372             =cut
1373              
1374             sub location {
1375 3     3 1 7 my $self = shift;
1376 3 100 50     8 @_ and ($$self{_HTML_DOM_loc}||die "Can't assign to location"
1377             ." without a location object")->href(@_);
1378             $$self{_HTML_DOM_loc}||()
1379 3 100       5 }
1380              
1381             sub set_location_object {
1382 1     1 1 3 $_[0]{_HTML_DOM_loc} = $_[1];
1383             }
1384              
1385              
1386             =item lastModified
1387              
1388             This method returns the document's modification date as gleaned from the
1389             response object passed to the constructor, in MM/DD/YYYY HH:MM:SS format.
1390              
1391             If there is no modification date, an empty string is returned, but this
1392             may change in the future.
1393              
1394             =begin comment
1395              
1396             When there is no modification date, the return value is different in every
1397             browser.
1398             NS 2-4 and Opera 9 have the epoch (in GMT format).
1399             Firefox 3 has the time the page was loaded.
1400             Safari 4 has an empty string (it uses GMT format when there is a mod time).
1401             IE, 6-8 the only one to comply with HTML 5, has the current time; but HTML
1402             5 is illogical, since it makes no sense for the modification time to keep
1403             ticking away.
1404              
1405             I’ve opted to use the empty string for now, since we can’t *really* find
1406             out the modification time--only what the server *says* it is. And if the
1407             server doesn’t say, it’s no use pretending that it did say it.
1408              
1409             =end comment
1410              
1411             =cut
1412              
1413             sub lastModified {
1414 4 100 100 4 1 517 my $time = ($_[0]{_HTML_DOM_response} || return '')->last_modified
1415             or return '';
1416 2         553 require Date'Format;
1417 2         3359 Date'Format'time2str("%d/%m/%Y %X", $time);
1418             }
1419              
1420              
1421             =back
1422              
1423             =cut
1424              
1425              
1426             # ---------- OVERRIDDEN NODE & EVENT TARGET METHODS -------------- #
1427              
1428       44 1   sub ownerDocument {} # empty list
1429 1     1 1 309 sub nodeName { '#document' }
1430 24     24   101 { no warnings 'once'; *nodeType = \& DOCUMENT_NODE; }
  24         24  
  24         1818  
1431              
1432             =head2 Other (Non-DOM) Methods
1433              
1434             (See also L, below.)
1435              
1436             =over 4
1437              
1438             =item $tree->base
1439              
1440             Returns the base URL of the page; either from a tag or the
1441             URL passed to C.
1442              
1443             =cut
1444              
1445             sub base {
1446 157     157 1 154 my $doc = shift;
1447 157 100       587 if(
1448             my $base_elem = $doc->look_down(_tag => 'base', href => qr)(?:\)))
1449             ){
1450 10         23 return ''.$base_elem->attr('href');
1451             }
1452             else {
1453 24     24   85 no warnings 'uninitialized';
  24         26  
  24         11319  
1454 147 100       114 ''.base{$$doc{_HTML_DOM_response}||return$doc->URL}
  147         165  
1455             }
1456             }
1457              
1458             =item $tree->magic_forms
1459              
1460             This is mainly for internal use. This returns a boolean indicating whether
1461             the parser needed to associate formies with a form that did not contain
1462             them. This happens when a closing tag is missing and the form is
1463             closed implicitly, but a formie is encountered later.
1464              
1465             =cut
1466              
1467 1580 50   1580 1 3295 sub magic_forms { @_ and ++$_[0]{_HTML_DOM_mg_f}; $_[0]{_HTML_DOM_mg_f} }
  1580         1951  
1468              
1469             =back
1470              
1471             =head1 HASH ACCESS
1472              
1473             You can use an HTML::DOM object as a hash ref to access it's form elements
1474             by name. So C<< $doc->{yayaya} >> is short for
1475             S<< C<< $doc->forms->{yayaya} >> >>.
1476              
1477             =head1 EVENT HANDLING
1478              
1479             HTML::DOM supports both the DOM Level 2 event model and the HTML 4 event
1480             model.
1481              
1482             Throughout this documentation, we make use of HTML 5's distinction between
1483             handlers and listeners: An event handler is the result of an HTML element
1484             beginning with 'on', e.g. onsubmit. These are also accessible via the DOM.
1485             (We also use the word 'handler' in other contexts, such as the 'default
1486             event handler'.)
1487             Event listeners are registered solely with the C method
1488             and can be removed with C.
1489              
1490             HTML::DOM accepts as an event handler a coderef, an object with a
1491             C method, or an object with C<&{}> overloading. If the
1492             C method is present, it is called with the current event
1493             target as the first argument and the event object as the second.
1494             This is to allow for objects that wrap JavaScript functions (which must be called with the event target as the B value).
1495              
1496             An event listener is a coderef, an object with a C
1497             method or an object with C<&{}> overloading. HTML::DOM does not implement
1498             any classes that provide a C method, but will support any
1499             object that has one.
1500              
1501             Listeners and handlers differ in one important aspect. A listener has to
1502             call C on the event object to cancel the default action. A
1503             handler simply returns a defined false value (except for mouseover events,
1504             which must return a true value to cancel the default).
1505              
1506             =head2 Default Actions
1507              
1508             Default actions that HTML::DOM is capable of handling internally (such as
1509             triggering a DOMActivate event when an element is clicked, and triggering a
1510             form's submit event when the submit button is activated) are dealt with
1511             automatically. You don't have to worry about those. For others, read
1512             on....
1513              
1514             To specify the default actions associated with an event, provide a
1515             subroutine (in this case, it not being part of the DOM, you can't use an
1516             object with a C method) via the C
1517             and
1518             C methods.
1519              
1520             With the former, you can specify the
1521             default action to be taken when a particular type of event occurs. The
1522             currently supported types are:
1523              
1524             submit when a form is submitted
1525             link called when a link is activated (DOMActivate event)
1526              
1527             Pass the type of event as the first argument and a code ref as the second
1528             argument. When the code ref is called, its sole argument will
1529             be the event object. For instance:
1530              
1531             $dom_tree->default_event_handler_for( link => sub {
1532             my $event = shift;
1533             go_to( $event->target->href );
1534             });
1535             sub go_to { ... }
1536              
1537             C with just one argument returns the
1538             currently
1539             assigned coderef. With two arguments it returns the old one after
1540             assigning the new one.
1541              
1542             Use C (without the C<_for>) to specify a fallback
1543             subroutine that will be used for events not in the list above, and for
1544             events in the list above that do not have subroutines assigned to them.
1545             Without any arguments it will return the currently
1546             assigned coderef. With an argument it will return the old one after
1547             assigning the new one.
1548              
1549             =head2 Dispatching Events
1550              
1551             HTML::DOM::Node's C method triggers the appropriate event
1552             listeners, but does B call any default actions associated with it.
1553             The return value is a boolean that indicates whether the default action
1554             should be taken.
1555              
1556             H:D:Node's C method will trigger the event for real. It will
1557             call C and, provided it returns true, will call the default
1558             event handler.
1559              
1560             =head2 HTML Event Attributes
1561              
1562             The C can be used to assign a coderef that will turn
1563             text assigned to an event attribute (e.g., C) into an event
1564             handler. The
1565             arguments to the routine will be (0) the element, (1) the name (aka
1566             type) of
1567             the event (without the initial 'on'), (2) the value of the attribute and
1568             (3) the offset within the source of the attribute's value. (Actually, if
1569             the value is within quotes, it is the offset of the first quotation mark.
1570             Also, it will be C for generated HTML [source code passed to the
1571             C method by an element handler].)
1572             As
1573             with C, you
1574             can replace an existing handler with a new one, in which case the old
1575             handler is returned. If you call this method without arguments, it returns
1576             the current handler. Here is an example of its use, that assumes that
1577             handlers are Perl code:
1578              
1579             $dom_tree->event_attr_handler(sub {
1580             my($elem, $name, $code, $offset) = @_;
1581             my $sub = eval "sub { $code }";
1582             return sub {
1583             local *_ = \$elem;
1584             &$sub;
1585             };
1586             });
1587              
1588             The event attribute handler will be called whenever an element attribute
1589             whose name
1590             begins with 'on' (case-tolerant) is modified. (For efficiency's sake, I may
1591             change it to call the event attribute handler only when the event is
1592             triggered, so it is not called unnecessarily.)
1593              
1594             =head2 When an Event Handler Dies
1595              
1596             Use C to assign a coderef that will be called whenever an
1597             event listener (or handler) raises an error. The error will be contained in
1598             C<$@>.
1599              
1600             =head2 Other Event-Related Methods
1601              
1602             =over
1603              
1604             =item $tree->event_parent
1605              
1606             =item $tree->event_parent( $new_val )
1607              
1608             This method lets you provide an object that is added to the top of the
1609             event dispatch chain. E.g., if you want the view object (the value of
1610             C, aka the window) to have event handlers called before the
1611             document in the capture phase, and after it in the bubbling phase, you can
1612             set it like this (see also L, above):
1613              
1614             $tree->event_parent( $tree->defaultView );
1615              
1616             This holds a weak reference.
1617              
1618             =item $tree->event_listeners_enabled
1619              
1620             =item $tree->event_listeners_enabled( $new_val )
1621              
1622             This attribute, which is true by default, can be used to disable event
1623             handlers and listeners. (Default event handlers [see above] still run,
1624             though.)
1625              
1626             =back
1627              
1628             =cut
1629              
1630              
1631             # ---------- NON-DOM EVENT METHODS -------------- #
1632              
1633             sub event_attr_handler {
1634 21     21 0 278 my $old = $_[0]->{_HTML_DOM_event_attr_handler};
1635 21 100       44 $_[0]->{_HTML_DOM_event_attr_handler} = $_[1] if @_ > 1;
1636 21         66 $old;
1637             }
1638             sub default_event_handler {
1639 1886     1886 0 1966 my $old = $_[0]->{_HTML_DOM_default_event_handler};
1640 1886 100       3286 $_[0]->{_HTML_DOM_default_event_handler} = $_[1] if @_ > 1;
1641 1886         3126 $old;
1642             }
1643             sub default_event_handler_for {
1644 222     222 0 293 my $old = $_[0]->{_HTML_DOM_dehf}{$_[1]};
1645 222 100       435 $_[0]->{_HTML_DOM_dehf}{$_[1]} = $_[2] if @_ > 2;
1646 222         709 $old;
1647             }
1648             sub error_handler {
1649 1895     1895 1 2214 my $old = $_[0]->{_HTML_DOM_error_handler};
1650 1895 100       3381 $_[0]->{_HTML_DOM_error_handler} = $_[1] if @_ > 1;
1651 1895         4588 $old;
1652             }
1653              
1654             sub event_parent {
1655 677     677 1 769 my $old = (my $self = shift) ->{_HTML_DOM_event_parent};
1656 677 100       1106 weaken($self->{_HTML_DOM_event_parent} = shift) if @_;
1657 677         1968 $old
1658             }
1659              
1660             sub event_listeners_enabled {
1661 1891     1891 1 2692 my $old = (my $Self = shift)->{_HTML_DOM_doevents};
1662 1891 100       3231 @_ and $$Self{_HTML_DOM_doevents} = !!shift;
1663 1891 100       4985 defined $old ? $old : 1; # true by default
1664             }
1665              
1666              
1667             # ---------- NODE AND SHEET LIST HELPER METHODS -------------- #
1668              
1669             sub _modified { # tells all it's magic nodelists that they're stale
1670             # and also rewrites the style sheet list if present
1671 376     376   576 my $list = $_[0]{_HTML_DOM_node_lists};
1672 376         380 my $list_is_stale;
1673 376         541 for (@$list) {
1674 276 100       591 defined() ? $_->_you_are_stale : ++$list_is_stale
1675             }
1676 376 100       533 if($list_is_stale) {
1677 48         134 @$list = grep defined, @$list;
1678 48         115 weaken $_ for @$list;
1679             }
1680            
1681 376         585 $_[0]->_populate_sheet_list
1682             }
1683              
1684             sub _populate_sheet_list { # called both by styleSheets and _modified
1685 413   100 413   414 for($_[0]->{_HTML_DOM_sheets}||return) {
1686 12         59 @$_ = map sheet $_,
1687             $_[0]->look_down(_tag => qr/^(?:link|style)\z/);
1688             }
1689             }
1690              
1691             sub _register_magic_node_list { # adds the node list to the list of magic
1692             # node lists that get notified automatic-
1693             # ally whenever the doc structure changes
1694 204     204   191 push @{$_[0]{_HTML_DOM_node_lists}}, $_[1];
  204         313  
1695 204         294 weaken $_[0]{_HTML_DOM_node_lists}[-1];
1696             }
1697              
1698              
1699              
1700             1;
1701             __END__