File Coverage

blib/lib/MojoMojo/Formatter/TOC.pm

Criterion	Covered	Total	%
statement	41	41	100.0
branch	8	14	57.1
condition	5	7	71.4
subroutine	9	9	100.0
pod	4	4	100.0
total	67	75	89.3

line	stmt	bran	cond	sub	pod	time	code
1							package MojoMojo::Formatter::TOC;
2
3	38			38		52308	use parent qw/MojoMojo::Formatter/;
	38					314
	38					209
4
5	38			38		2820	use HTML::Entities;
	38					9553
	38					1919
6	38			38		1110	use Encode;
	38					15570
	38					15494
7
8	38			38		15472	eval "use HTML::Toc;use HTML::TocInsertor;";
	38			38		82853
	38					1026
	38					17558
	38					338230
	38					717
9							my $eval_res = $@;
10
11							=head2 module_loaded
12
13							Return true if the module is loaded.
14
15							=cut
16
17	303	50		303	1	1491	sub module_loaded { $eval_res ? 0 : 1 }
18
19							=head1 NAME
20
21							MojoMojo::Formatter::TOC - generate table of contents
22
23							=head1 DESCRIPTION
24
25							This formatter will replace C<{{toc}}> with a table of contents, using
26							HTML::GenToc. If you don't want an element to be included in the TOC,
27							make it have C<class="notoc">
28
29							=head1 METHODS
30
31							=head2 format_content_order
32
33							The TOC formatter expects HTML input so it needs to run after the main
34							formatter. Since comment-type formatters (order 91) could add a heading
35							for the comment section, the TOC formatter will run with a priority of 95.
36
37							=cut
38
39	868			868	1	2653	sub format_content_order { 95 }
40
41							=head2 format_content
42
43							Calls the formatter. Takes a ref to the content as well as the context object.
44							The syntax for the TOC plugin invocation is:
45
46							{{toc M- }} # start from Header level M
47							{{toc -N }} # stop at Header level N
48							{{toc M-N }} # process only header levels M..N
49
50							where M is the minimum heading level to include in the TOC, and N is the
51							maximum level (depth). For example, suppose you only have one H1 on the page
52							so it doesn't make sense to add it to the TOC; also, assume you and don't want
53							to include any headers smaller than H3. The {{toc}} markup to achieve that would be:
54
55							{{toc 2-3}}
56
57							Defaults to 1-6.
58
59							=cut
60
61							sub format_content {
62	132			132	1	54176	my ( $class, $content ) = @_;
63	132	50				591	return unless $class->module_loaded;
64	132					609	my $toc_params_RE = qr/\s+ (\d+)? \s* - \s* (\d+)?/x;
65	132					1936	while (
66							# replace the {{toc ..}} markup tag and parse potential parameters
67							$$content =~ s[
68							\{\{ toc (?:$toc_params_RE)? \s* \/? }}
69							][<div class="toc">\n<!--mojomojoTOCwillgohere-->\n</div>]ix) {
70	12					33	my ($toc_h_min, $toc_h_max);
71	12		100			62	$toc_h_min = $1 \|\| 1;
72	12		100			54	$toc_h_max = $2 \|\| 9; # in practice, there are no more than 6 heading levels
73	12	50				46	$toc_h_min = 9 if $toc_h_min > 9; # prevent TocGenerator error for headings >= 10
74	12	50	33			60	$toc_h_max = 9 if $toc_h_max > 9 or $toc_h_max < $toc_h_min; # {{toc 3-1}} is wrong; make it {{toc 3-9}} instead
75
76	12					68	my $toc = HTML::Toc->new();
77	12					663	my $tocInsertor = HTML::TocInsertor->new();
78
79	12					1914	$toc->setOptions({
80							header => '', # by default, \n<!-- Table of Contents generated by Perl - HTML::Toc -->\n
81							footer => '',
82							insertionPoint => 'replace <!--mojomojoTOCwillgohere-->',
83							doLinkToId => 0,
84							levelToToc => "[$toc_h_min-$toc_h_max]",
85							templateAnchorName => \&assembleAnchorName,
86							});
87
88							# http://search.cpan.org/dist/HTML-Toc/Toc.pod#HTML::TocInsertor::insert()
89	12					261	$tocInsertor->insert($toc, $$content, {output => $content});
90
91	12					18101	return 1;
92							}
93							}
94
95							=head2 SEO-friendly anchors
96
97							Anchors should be generated with SEO- (and human-) friendly names, i.e. out of the entire
98							token text, instead of being numeric or reduced to the first word(s) of the token.
99							In the spirit of http://seo2.0.onreact.com/top-10-fatal-url-design-mistakes, compare:
100
101							http://beachfashion.com/photos/Pamela_Anderson#In_red_swimsuit_in_Baywatch
102							vs.
103							http://beachfashion.com/photos/Pamela_Anderson#in
104
105							"Which one speaks your language more, which one will you rather click?"
106
107							The anchor names generated are compliant with XHTML1.0 Strict. Also, per the
108							HTML 4.01 spec, anchor names should be restricted to ASCII characters and
109							anchors that differ only in case may not appear in the same document. In
110							particular, an anchor name may be defined only once in a document (logically,
111							because otherwise the user agent wouldn't know which #foo to scroll to).
112							This is currently a problem with L<HTML::Toc> v1.11, which doesn't have
113							support for passing the already existing anchors to the C<templateAnchorName>
114							sub.
115
116							=head2 assembleAnchorName
117
118							http://search.cpan.org/dist/HTML-Toc/Toc.pod#templateAnchorName
119
120							=cut
121
122							sub assembleAnchorName {
123	49			49	1	61192	my ($aFile, $aGroupId, $aLevel, $aNode, $text, $children) = @_;
124
125	49	50				246	if ($text !~ /^\s*$/) {
126							# generate a SEO-friendly anchor right from the token content
127							# The allowed character set is limited first by the URI specification for fragments, http://tools.ietf.org/html/rfc3986#section-2: characters
128							# then by the limitations of the values of 'id' and 'name' attributes: http://www.w3.org/TR/REC-html40/types.html#type-name
129							# Eventually, the only punctuation allowed in id values is [_.:-]
130							# Unicode characters with code points > 0x7E (e.g. Chinese characters) are allowed (test "<h1 id="????">header</h1>" at http://validator.w3.org/#validate_by_input+with_options), except for smart quotes (!), see http://www.w3.org/Search/Mail/Public/search?type-index=www-validator&index-type=t&keywords=[VE][122]+smart+quotes&search=Search+Mail+Archives
131							# However, that contradicts the HTML 4.01 spec: "Anchor names should be restricted to ASCII characters." - http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1
132							# ...and the [A-Za-z] class of letters mentioned at http://www.w3.org/TR/REC-html40/types.html#type-name
133							# Finally, note that pod2html fails miserably to generate XHTML-compliant anchor links. See http://validator.w3.org/check?uri=http%3A%2F%2Fsearch.cpan.org%2Fdist%2FCatalyst-Runtime%2Flib%2FCatalyst%2FRequest.pm&charset=(detect+automatically)&doctype=XHTML+1.0+Transitional&group=0&user-agent=W3C_Validator%2F1.606
134	49					217	$text =~ s/\s/_/g;
135	49					226	decode_entities($text); # we need to replace [#&;] only when they are NOT part of an HTML entity. decode_entities saves us from crafting a nasty regexp
136	49					158	$text = encode('utf-8', $text); # convert to UTF-8 because we need to output the UTF-8 bytes
137	49					2650	$text =~ s/([^A-Za-z0-9_:.-])/sprintf('.%02X', ord($1))/eg; # MediaWiki also uses the period, see http://en.wikipedia.org/wiki/Hierarchies#Ethics.2C_behavioral_psychology.2C_philosophies_of_identity
	69					203
138	49	100				163	$text = 'L'.$text if $text =~ /\A\W/; # "ID and NAME tokens must begin with a letter ([A-Za-z])" -- http://www.w3.org/TR/html4/types.html#type-name
139							}
140	49	50				142	$text = 'id' if $text eq '';
141
142							# check if the anchor already exists; if so, add a number
143							# NOTE: there is no way currently to do this easily in HTML-Toc-1.10.
144
145							#my $anch_num = 1;
146							#my $word_name = $name;
147							## Reference: http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1
148							## Anchor names must be unique within a document. Anchor names that differ only in case may not appear in the same document.
149							#while (grep {lc $_ eq lc $name} keys %{$args{anchors}}) {
150							# # FIXME (in caller sub): to avoid the grep above, the $args{anchors} hash
151							# # should have as key the lowercased anchor name, and as value its actual value (instead of '1')
152							# $name = $word_name . "_$anch_num";
153							# $anch_num++;
154							#}
155
156	49					136	return $text;
157							}
158
159
160							=head1 SEE ALSO
161
162							L<MojoMojo> and L<Module::Pluggable::Ordered>.
163
164							=head1 AUTHORS
165
166							Dan Dascalescu, L<http://dandascalescu.com>
167
168							=head1 LICENSE
169
170							This library is free software. You can redistribute it and/or modify
171							it under the same terms as Perl itself.
172
173							=cut
174
175							1;