File Coverage

blib/lib/HTML/HTML5/Writer.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package HTML::HTML5::Writer;
2              
3 1     1   28409 use 5.010;
  1         4  
  1         47  
4 1     1   5 use base qw[Exporter];
  1         1  
  1         126  
5 1     1   6 use strict;
  1         6  
  1         38  
6 1     1   1169 use HTML::HTML5::Entities 0.001 qw[];
  1         19756  
  1         34  
7 1     1   572 use XML::LibXML qw[:all];
  0            
  0            
8              
9             use constant {
10             DOCTYPE_NIL => '',
11             DOCTYPE_HTML32 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">',
12             DOCTYPE_HTML4 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
13             DOCTYPE_HTML5 => '<!DOCTYPE html>',
14             DOCTYPE_LEGACY => '<!DOCTYPE html SYSTEM "about:legacy-compat">',
15             DOCTYPE_XHTML1 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">',
16             DOCTYPE_XHTML11 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">',
17             DOCTYPE_XHTML_BASIC => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.1//EN" "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd">',
18             DOCTYPE_XHTML_RDFA => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.1//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd">',
19             DOCTYPE_HTML2 => '<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">',
20             DOCTYPE_HTML40 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/1998/REC-html40-19980424/strict.dtd">',
21             DOCTYPE_HTML40_STRICT => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/1998/REC-html40-19980424/strict.dtd">',
22             DOCTYPE_HTML40_LOOSE => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/1998/REC-html40-19980424/loose.dtd">',
23             DOCTYPE_HTML40_FRAMESET => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" "http://www.w3.org/TR/1998/REC-html40-19980424/frameset.dtd">',
24             DOCTYPE_HTML401 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
25             DOCTYPE_HTML401_STRICT => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
26             DOCTYPE_HTML401_LOOSE => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">',
27             DOCTYPE_HTML401_FRAMESET => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">',
28             DOCTYPE_XHTML1_STRICT => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">',
29             DOCTYPE_XHTML1_LOOSE => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
30             DOCTYPE_XHTML1_FRAMESET => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">',
31             DOCTYPE_XHTML_MATHML_SVG => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">',
32             DOCTYPE_XHTML_BASIC_10 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.0//EN" "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd">',
33             DOCTYPE_XHTML_BASIC_11 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.1//EN" "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd">',
34             DOCTYPE_HTML4_RDFA => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01+RDFa 1.1//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd">',
35             DOCTYPE_HTML401_RDFA11 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01+RDFa 1.1//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd">',
36             DOCTYPE_HTML401_RDFA10 => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/html401-rdfa-1.dtd">',
37             DOCTYPE_XHTML_RDFA10 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">',
38             DOCTYPE_XHTML_RDFA11 => '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.1//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd">',
39             };
40              
41             our $VERSION = '0.201';
42              
43             our %EXPORT_TAGS = (
44             doctype => [qw(DOCTYPE_NIL DOCTYPE_HTML32 DOCTYPE_HTML4 DOCTYPE_HTML5
45             DOCTYPE_LEGACY DOCTYPE_XHTML1 DOCTYPE_XHTML11 DOCTYPE_XHTML_BASIC
46             DOCTYPE_XHTML_RDFA DOCTYPE_HTML2 DOCTYPE_HTML40 DOCTYPE_HTML40_STRICT
47             DOCTYPE_HTML40_LOOSE DOCTYPE_HTML40_FRAMESET DOCTYPE_HTML401
48             DOCTYPE_HTML401_STRICT DOCTYPE_HTML401_LOOSE DOCTYPE_HTML401_FRAMESET
49             DOCTYPE_XHTML1_STRICT DOCTYPE_XHTML1_LOOSE DOCTYPE_XHTML1_FRAMESET
50             DOCTYPE_XHTML_MATHML_SVG DOCTYPE_XHTML_BASIC_10 DOCTYPE_XHTML_BASIC_11
51             DOCTYPE_HTML4_RDFA DOCTYPE_HTML401_RDFA11 DOCTYPE_HTML401_RDFA10
52             DOCTYPE_XHTML_RDFA10 DOCTYPE_XHTML_RDFA11)]
53             );
54             our @EXPORT_OK = @{ $EXPORT_TAGS{doctype} };
55              
56             our @VoidElements = qw(area base br col command embed hr
57             img input keygen link meta param source track wbr);
58             our @BooleanAttributes = qw(
59             hidden
60             audio@autoplay audio@preload audio@controls audio@loop
61             button@autofocus button@disabled button@formnovalidate
62             command@checked command@disabled
63             details@open
64             dl@compact
65             fieldset@disabled
66             form@novalidate
67             hr@noshade
68             iframe@seamless
69             img@ismap
70             input@autofocus input@checked input@disabled input@formnovalidate
71             input@multiple input@readonly input@required
72             keygen@autofocus keygen@disabled
73             ol@reversed
74             optgroup@disabled
75             option@disabled option@selected
76             script@async script@defer
77             select@autofocus select@disabled select@multiple select@readonly
78             select@required
79             style@scoped
80             textarea@autofocus textarea@disabled textarea@required
81             time@pubdate
82             track@default
83             video@autoplay video@preload video@controls video@loop
84             );
85             our @OptionalStart = qw(html head body tbody);
86             our @OptionalEnd = qw(html head body tbody dt dd li optgroup
87             option p rp rt td th tfoot thead tr);
88              
89             sub new
90             {
91             my ($class, %opts) = @_;
92             my $self = bless \%opts => $class;
93            
94             $self->{'markup'} //= 'html';
95             $self->{'charset'} //= 'utf8';
96             $self->{'refs'} //= 'hex';
97             $self->{'doctype'} //= ($self->is_xhtml? DOCTYPE_LEGACY : DOCTYPE_HTML5);
98             $self->{'polyglot'} //= !!$self->is_xhtml;
99            
100             return $self;
101             }
102              
103             sub is_xhtml
104             {
105             my ($self) = @_;
106             return ($self->{'markup'} =~ m'^(xml|xhtml|application/xml|text/xml|application/xhtml\+xml)$'i);
107             }
108              
109             sub is_polyglot
110             {
111             my ($self) = @_;
112             return $self->{'polyglot'};
113             }
114              
115             sub should_quote_attributes
116             {
117             my ($self) = @_;
118             return $self->{'quote_attributes'} if exists $self->{'quote_attributes'};
119             return $self->is_xhtml || $self->is_polyglot;
120             }
121              
122             sub should_slash_voids
123             {
124             my ($self) = @_;
125             return $self->{'voids'} if exists $self->{'voids'};
126             return $self->is_xhtml || $self->is_polyglot;
127             }
128              
129             sub should_force_end_tags
130             {
131             my ($self) = @_;
132             return $self->{'end_tags'} if exists $self->{'end_tags'};
133             return $self->is_xhtml || $self->is_polyglot;
134             }
135              
136             sub should_force_start_tags
137             {
138             my ($self) = @_;
139             return $self->{'start_tags'} if exists $self->{'start_tags'};
140             return $self->is_xhtml || $self->is_polyglot;
141             }
142              
143             sub document
144             {
145             my ($self, $document) = @_;
146             my @childNodes = $document->childNodes;
147             return $self->doctype
148             . join '', (map { $self->_element_etc($_); } @childNodes);
149             }
150              
151             sub doctype
152             {
153             my ($self) = @_;
154             return $self->{'doctype'};
155             }
156              
157             sub _element_etc
158             {
159             my ($self, $etc) = @_;
160              
161             if ($etc->nodeName eq '#text')
162             { return $self->text($etc); }
163             elsif ($etc->nodeName eq '#comment')
164             { return $self->comment($etc); }
165             elsif ($etc->nodeName eq '#cdata-section')
166             { return $self->cdata($etc); }
167             elsif ($etc->isa('XML::LibXML::PI'))
168             { return $self->pi($etc); }
169             else
170             { return $self->element($etc); }
171             }
172              
173             sub element
174             {
175             my ($self, $element) = @_;
176            
177             return $element->toString
178             unless $element->namespaceURI eq 'http://www.w3.org/1999/xhtml';
179            
180             my $rv = '';
181             my $tagname = $element->nodeName;
182             my %attrs = map { $_->nodeName => $_ } $element->attributes;
183             my @kids = $element->childNodes;
184              
185             if ($tagname eq 'html' && !$self->is_xhtml && !$self->is_polyglot)
186             {
187             delete $attrs{'xmlns'};
188             }
189              
190             my $omitstart = 0;
191             if (!%attrs and !$self->should_force_start_tags and grep { $tagname eq $_ } @OptionalStart)
192             {
193             $omitstart += eval "return \$self->_check_omit_start_${tagname}(\$element);";
194             }
195              
196             my $omitend = 0;
197             if (!$self->should_force_end_tags and grep { $tagname eq $_ } @OptionalEnd)
198             {
199             $omitend += eval "return \$self->_check_omit_end_${tagname}(\$element);";
200             }
201              
202             unless ($omitstart)
203             {
204             $rv .= '<'.$tagname;
205             foreach my $a (sort keys %attrs)
206             {
207             $rv .= ' '.$self->attribute($attrs{$a}, $element);
208             }
209             }
210            
211             if (!@kids and grep { $tagname eq $_ } @VoidElements and !$omitstart)
212             {
213             $rv .= $self->should_slash_voids ? ' />' : '>';
214             return $rv;
215             }
216            
217             $rv .= '>' unless $omitstart;
218            
219             foreach my $kid (@kids)
220             {
221             $rv .= $self->_element_etc($kid);
222             }
223            
224             unless ($omitend)
225             {
226             $rv .= '</'.$tagname.'>';
227             }
228            
229             return $rv;
230             }
231              
232             sub attribute
233             {
234             my ($self, $attr, $element) = @_;
235            
236             my $minimize = 0;
237             my $quote = 1;
238             my $quotechar = '"';
239            
240             my $attrname = $attr->nodeName;
241             my $elemname = $element ? $element->nodeName : '*';
242            
243             unless ($self->should_quote_attributes)
244             {
245             if (($attr->value eq $attrname or $attr->value eq '')
246             and grep { $_ eq $attrname or $_ eq sprintf('%s@%s',$elemname,$attrname) } @BooleanAttributes)
247             {
248             return $attrname;
249             }
250            
251             if ($attr->value =~ /^[A-Za-z0-9\._:-]+$/)
252             {
253             return sprintf('%s=%s', $attrname, $attr->value);
254             }
255             }
256            
257             my $encoded_value;
258             if ($attr->value !~ /\"/)
259             {
260             $quotechar = '"';
261             $encoded_value = $self->encode_entities($attr->value);
262             }
263             elsif ($attr->value !~ /\'/)
264             {
265             $quotechar = "'";
266             $encoded_value = $self->encode_entities($attr->value);
267             }
268             else
269             {
270             $quotechar = '"';
271             $encoded_value = $self->encode_entities($attr->value,
272             characters => "\"");
273             }
274            
275             return sprintf('%s=%s%s%s', $attrname, $quotechar, $encoded_value, $quotechar);
276             }
277              
278             sub comment
279             {
280             my ($self, $text) = @_;
281             return '<!--' . $self->encode_entities($text->nodeValue) . '-->';
282             }
283              
284             sub pi
285             {
286             my ($self, $pi) = @_;
287             if ($pi->nodeName eq 'decode')
288             {
289             return HTML::HTML5::Entities::decode($pi->textContent);
290             }
291             return $pi->toString;
292             }
293              
294             sub cdata
295             {
296             my ($self, $text) = @_;
297             if ($self->is_polyglot && $text->parentNode->nodeName =~ /^(script|style)$/i)
298             {
299             return '/* <![CDATA[ */' . $text->nodeValue . '/* ]]> */';
300             }
301             elsif (!$self->is_xhtml && $text->parentNode->nodeName =~ /^(script|style)$/i)
302             {
303             return $text->nodeValue;
304             }
305             elsif(!$self->is_xhtml)
306             {
307             return $self->text($text);
308             }
309             else
310             {
311             return '<![CDATA[' . $text->nodeValue . ']]>';
312             }
313             }
314            
315             sub text
316             {
317             my ($self, $text) = @_;
318             if ($self->is_polyglot && $text->parentNode->nodeName =~ /^(script|style)$/i)
319             {
320             return '/* <![CDATA[ */' . $text->nodeValue . '/* ]]> */';
321             }
322             elsif (!$self->is_xhtml && $text->parentNode->nodeName =~ /^(script|style)$/i)
323             {
324             return $text->nodeValue;
325             }
326             elsif ($text->parentNode->nodeName =~ /^(script|style)$/i)
327             {
328             return '<![CDATA[' . $text->nodeValue . ']]>';
329             }
330             return $self->encode_entities($text->nodeValue,
331             characters => "<>");
332             }
333            
334             sub encode_entities
335             {
336             my ($self, $string, %options) = @_;
337            
338             my $characters = $options{'characters'};
339             $characters .= '&';
340             $characters .= '\x{0}-\x{8}\x{B}\x{C}\x{E}-\x{1F}\x{26}\x{7F}';
341             $characters .= '\x{80}-\x{FFFFFF}' unless $self->{'charset'} =~ /^utf[_-]?8$/i;
342            
343             my $regexp = qr/[$characters]/;
344            
345             local $HTML::HTML5::Entities::hex = ($self->{'refs'} !~ /dec/i);
346             return HTML::HTML5::Entities::encode_entities($string, $regexp);
347             }
348              
349             sub encode_entity
350             {
351             my ($self, $char) = @_;
352              
353             local $HTML::HTML5::Entities::hex = ($self->{'refs'} !~ /dec/i);
354             return HTML::HTML5::Entities::encode_entities($char, qr/./);
355             }
356              
357             sub _check_omit_end_body
358             {
359             my ($self, $element) = @_;
360             my $next = $element->nextSibling;
361             unless (defined $next && $next->nodeName eq '#comment')
362             {
363             return 1 if $element->childNodes || !$self->_check_omit_start_body($element);
364             }
365             }
366              
367             sub _check_omit_end_head
368             {
369             my ($self, $element) = @_;
370             my $next = $element->nextSibling;
371             return 0 unless defined $next;
372             return 0 if $next->nodeName eq '#comment';
373             return 0 if $next->nodeName eq '#text' && $next->nodeValue =~ /^\s/;
374             return 1;
375             }
376              
377             sub _check_omit_end_html
378             {
379             my ($self, $element) = @_;
380            
381             my @bodies = $element->getChildrenByTagName('body');
382             if ($bodies[-1]->childNodes || $bodies[-1]->attributes)
383             {
384             return !defined $element->nextSibling;
385             }
386             }
387              
388             sub _check_omit_end_dd
389             {
390             my ($self, $element) = @_;
391            
392             return 1 unless defined $element->nextSibling;
393             return 1 if $element->nextSibling->nodeName
394             =~ /^( dd | dt )$/x;
395             }
396              
397             *_check_omit_end_dt = \&_check_omit_end_dd;
398              
399             sub _check_omit_end_li
400             {
401             my ($self, $element) = @_;
402            
403             return 1 unless defined $element->nextSibling;
404             return 1 if $element->nextSibling->nodeName
405             =~ /^( li )$/x;
406             }
407              
408             sub _check_omit_end_optgroup
409             {
410             my ($self, $element) = @_;
411            
412             return 1 unless defined $element->nextSibling;
413             return 1 if $element->nextSibling->nodeName
414             =~ /^( optgroup )$/x;
415             }
416              
417             sub _check_omit_end_option
418             {
419             my ($self, $element) = @_;
420            
421             return 1 unless defined $element->nextSibling;
422             return 1 if $element->nextSibling->nodeName
423             =~ /^( option | optgroup )$/x;
424             }
425              
426             sub _check_omit_end_p
427             {
428             my ($self, $element) = @_;
429            
430             return 1 unless defined $element->nextSibling;
431             return 1 if $element->nextSibling->nodeName
432             =~ /^( address | article | aside | blockquote | dir
433             | div | dl | fieldset | footer | form | h[1-6]
434             | header | hr | menu | nav | ol | p | pre | section
435             | table | ul )$/x;
436             }
437              
438             sub _check_omit_end_rp
439             {
440             my ($self, $element) = @_;
441            
442             return 1 unless defined $element->nextSibling;
443             return 1 if $element->nextSibling->nodeName
444             =~ /^( rp | rt )$/x;
445             }
446              
447             *_check_omit_end_rt = \&_check_omit_end_rp;
448              
449             sub _check_omit_end_td
450             {
451             my ($self, $element) = @_;
452            
453             return 1 unless defined $element->nextSibling;
454             return 1 if $element->nextSibling->nodeName
455             =~ /^( td | th )$/x;
456             }
457              
458             *_check_omit_end_th = \&_check_omit_end_td;
459              
460             sub _check_omit_end_tbody
461             {
462             my ($self, $element) = @_;
463            
464             return 1 unless defined $element->nextSibling;
465             return 1 if $element->nextSibling->nodeName
466             =~ /^( tbody | tfoot )$/x;
467             }
468              
469             sub _check_omit_end_tfoot
470             {
471             my ($self, $element) = @_;
472            
473             return 1 unless defined $element->nextSibling;
474             return 1 if $element->nextSibling->nodeName
475             =~ /^( tbody )$/x;
476             }
477              
478             sub _check_omit_end_thead
479             {
480             my ($self, $element) = @_;
481            
482             return 0 unless defined $element->nextSibling;
483             return 1 if $element->nextSibling->nodeName
484             =~ /^( tbody | tfoot )$/x;
485             }
486              
487             sub _check_omit_end_tr
488             {
489             my ($self, $element) = @_;
490            
491             return 1 unless defined $element->nextSibling;
492             return 1 if $element->nextSibling->nodeName
493             =~ /^( tr )$/x;
494             }
495              
496             sub _check_omit_start_body
497             {
498             my ($self, $element) = @_;
499             my @kids = $element->childNodes;
500             my $next = $kids[0];
501             return 0 unless defined $next;
502             return 0 if $next->nodeName eq '#comment';
503             return 0 if $next->nodeName eq '#text' && $next->nodeValue =~ /^\s/;
504             return 0 if $next->nodeName eq 'style';
505             return 0 if $next->nodeName eq 'script';
506             return 1;
507             }
508              
509             sub _check_omit_start_head
510             {
511             my ($self, $element) = @_;
512             my @kids = $element->childNodes;
513             return (@kids and $kids[0]->nodeType==XML_ELEMENT_NODE);
514             }
515              
516             sub _check_omit_start_html
517             {
518             my ($self, $element) = @_;
519             my @kids = $element->childNodes;
520             return (@kids and $kids[0]->nodeName ne '#comment');
521             }
522              
523             sub _check_omit_start_tbody
524             {
525             my ($self, $element) = @_;
526            
527             my @kids = $element->childNodes;
528             return 0 unless @kids;
529             return 0 unless $kids[0]->nodeName eq 'tr';
530             return 1 unless defined $element->previousSibling;
531            
532             return 1
533             if $element->previousSibling->nodeName eq 'tbody'
534             && $self->_check_omit_end_tbody($element->previousSibling);
535              
536             return 1
537             if $element->previousSibling->nodeName eq 'thead'
538             && $self->_check_omit_end_thead($element->previousSibling);
539              
540             return 1
541             if $element->previousSibling->nodeName eq 'tfoot'
542             && $self->_check_omit_end_tfoot($element->previousSibling);
543             }
544              
545             1;
546              
547             __END__
548              
549             =head1 NAME
550              
551             HTML::HTML5::Writer - output a DOM as HTML5
552              
553             =head1 SYNOPSIS
554              
555             use HTML::HTML5::Writer;
556            
557             my $writer = HTML::HTML5::Writer->new;
558             print $writer->document($dom);
559              
560             =head1 DESCRIPTION
561              
562             This module outputs XML::LibXML::Node objects as HTML5 strings.
563             It works well on DOM trees that represent valid HTML/XHTML
564             documents; less well on other DOM trees.
565              
566             =head2 Constructor
567              
568             =over 4
569              
570             =item C<< $writer = HTML::HTML5::Writer->new(%opts) >>
571              
572             Create a new writer object. Options include:
573              
574             =over 4
575              
576             =item * B<markup>
577              
578             Choose which serialisation of HTML5 to use: 'html' or 'xhtml'.
579              
580             =item * B<polyglot>
581              
582             Set to true in order to attempt to produce output which works as both
583             XML and HTML. Set to false to produce content that might not.
584              
585             If you don't explicitly set it, then it defaults to false for HTML, and
586             true for XHTML.
587              
588             =item * B<doctype>
589              
590             Set this to a string to choose which <!DOCTYPE> tag to output. Note, this
591             purely sets the <!DOCTYPE> tag and does not change how the rest of the
592             document is output. This really is just a plain string literal...
593              
594             # Yes, this works...
595             my $w = HTML::HTML5::Writer->new(doctype => '<!doctype html>');
596              
597             The following constants are provided for convenience:
598             B<DOCTYPE_HTML2>,
599             B<DOCTYPE_HTML32>,
600             B<DOCTYPE_HTML4> (latest stable strict HTML 4.x),
601             B<DOCTYPE_HTML4_RDFA> (latest stable HTML 4.x+RDFa),
602             B<DOCTYPE_HTML40> (strict),
603             B<DOCTYPE_HTML40_FRAMESET>,
604             B<DOCTYPE_HTML40_LOOSE>,
605             B<DOCTYPE_HTML40_STRICT>,
606             B<DOCTYPE_HTML401> (strict),
607             B<DOCTYPE_HTML401_FRAMESET>,
608             B<DOCTYPE_HTML401_LOOSE>,
609             B<DOCTYPE_HTML401_RDFA10>,
610             B<DOCTYPE_HTML401_RDFA11>,
611             B<DOCTYPE_HTML401_STRICT>,
612             B<DOCTYPE_HTML5>,
613             B<DOCTYPE_LEGACY> (about:legacy-compat),
614             B<DOCTYPE_NIL> (empty string),
615             B<DOCTYPE_XHTML1> (strict),
616             B<DOCTYPE_XHTML1_FRAMESET>,
617             B<DOCTYPE_XHTML1_LOOSE>,
618             B<DOCTYPE_XHTML1_STRICT>,
619             B<DOCTYPE_XHTML11>,
620             B<DOCTYPE_XHTML_BASIC>,
621             B<DOCTYPE_XHTML_BASIC_10>,
622             B<DOCTYPE_XHTML_BASIC_11>,
623             B<DOCTYPE_XHTML_MATHML_SVG>,
624             B<DOCTYPE_XHTML_RDFA> (latest stable strict XHTML+RDFa),
625             B<DOCTYPE_XHTML_RDFA10>,
626             B<DOCTYPE_XHTML_RDFA11>.
627              
628             Defaults to DOCTYPE_HTML5 for HTML and DOCTYPE_LEGACY for XHTML.
629              
630             =item * B<charset>
631              
632             This module always returns strings in Perl's internal utf8 encoding, but
633             you can set the 'charset' option to 'ascii' to create output that would
634             be suitable for re-encoding to ASCII (e.g. it will entity-encode characters
635             which do not exist in ASCII).
636              
637             =item * B<quote_attributes>
638              
639             Set this to a true to force attributes to be quoted. If not explicitly
640             set, the writer will automatically detect when attributes need quoting.
641              
642             =item * B<voids>
643              
644             Set this to true to force void elements to always be terminated with '/>'.
645             If not explicitly set, they'll only be terminated that way in polyglot or
646             XHTML documents.
647              
648             =item * B<start_tags> and B<end_tags>
649              
650             Except in polyglot and XHTML documents, some elements allow their
651             start and/or end tags to be omitted in certain circumstances. By
652             setting these to true, you can prevent them from being omitted.
653              
654             =item * B<refs>
655              
656             Special characters that can't be encoded as named entities need
657             to be encoded as numeric character references instead. These
658             can be expressed in decimal or hexadecimal. Setting this option to
659             'dec' or 'hex' allows you to choose. The default is 'hex'.
660              
661             =back
662              
663             =back
664              
665             =head2 Public Methods
666              
667             =over 4
668              
669             =item C<< $writer->document($node) >>
670              
671             Outputs (i.e. returns a string that is) an XML::LibXML::Document as HTML.
672              
673             =item C<< $writer->element($node) >>
674              
675             Outputs an XML::LibXML::Element as HTML.
676              
677             =item C<< $writer->attribute($node) >>
678              
679             Outputs an XML::LibXML::Attr as HTML.
680              
681             =item C<< $writer->text($node) >>
682              
683             Outputs an XML::LibXML::Text as HTML.
684              
685             =item C<< $writer->cdata($node) >>
686              
687             Outputs an XML::LibXML::CDATASection as HTML.
688              
689             =item C<< $writer->comment($node) >>
690              
691             Outputs an XML::LibXML::Comment as HTML.
692              
693             =item C<< $writer->pi($node) >>
694              
695             Outputs an XML::LibXML::PI as HTML.
696              
697             =item C<< $writer->doctype >>
698              
699             Outputs the writer's DOCTYPE.
700              
701             =item C<< $writer->encode_entities($string, characters=>$more) >>
702              
703             Takes a string and returns the same string with some special characters
704             replaced. These special characters do not include any of '&', '<', '>'
705             or '"', but you can provide a string of additional characters to treat as
706             special:
707              
708             $encoded = $writer->encode_entities($raw, characters=>'&<>"');
709              
710             =item C<< $writer->encode_entity($char) >>
711              
712             Returns $char entity-encoded. Encoding is done regardless of whether
713             $char is "special" or not.
714              
715             =item C<< $writer->is_xhtml >>
716              
717             Boolean indicating if $writer is configured to output XHTML.
718              
719             =item C<< $writer->is_polyglot >>
720              
721             Boolean indicating if $writer is configured to output polyglot HTML.
722              
723             =item C<< $writer->should_force_start_tags >>
724              
725             =item C<< $writer->should_force_end_tags >>
726              
727             Booleans indicating whether optional start and end tags should be forced.
728              
729             =item C<< $writer->should_quote_attributes >>
730              
731             Boolean indicating whether attributes need to be quoted.
732              
733             =item C<< $writer->should_slash_voids >>
734              
735             Boolean indicating whether void elements should be closed in the XHTML style.
736              
737             =back
738              
739             =head1 BUGS AND LIMITATIONS
740              
741             Certain DOM constructs cannot be output in non-XML HTML. e.g.
742              
743             my $xhtml = <<XHTML;
744             <html xmlns="http://www.w3.org/1999/xhtml">
745             <head><title>Test</title></head>
746             <body><hr>This text is within the HR element</hr></body>
747             </html>
748             XHTML
749             my $dom = XML::LibXML->new->parse_string($xhtml);
750             my $writer = HTML::HTML5::Writer->new(markup=>'html');
751             print $writer->document($dom);
752              
753             In HTML, there's no way to serialise that properly in HTML. Right
754             now this module just outputs that HR element with text contained
755             within it, a la XHTML. In future versions, it may emit a warning
756             or throw an error.
757              
758             In these cases, the HTML::HTML5::{Parser,Writer} combination is
759             not round-trippable.
760              
761             Outputting elements and attributes in foreign (non-XHTML)
762             namespaces is implemented pretty naively and not thoroughly
763             tested. I'd be interested in any feedback people have, especially
764             on round-trippability of SVG, MathML and RDFa content in HTML.
765              
766             Please report any bugs to L<http://rt.cpan.org/>.
767              
768             =head1 SEE ALSO
769              
770             L<HTML::HTML5::Parser>,
771             L<HTML::HTML5::Builder>,
772             L<HTML::HTML5::ToText>,
773             L<XML::LibXML>.
774              
775             =head1 AUTHOR
776              
777             Toby Inkster E<lt>tobyink@cpan.orgE<gt>.
778              
779             =head1 COPYRIGHT AND LICENSE
780              
781             Copyright (C) 2010-2012 by Toby Inkster.
782              
783             This library is free software; you can redistribute it and/or modify
784             it under the same terms as Perl itself.
785              
786              
787             =cut