| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MojoMojo::Formatter::TOC; |
|
2
|
|
|
|
|
|
|
|
|
3
|
38
|
|
|
38
|
|
52308
|
use parent qw/MojoMojo::Formatter/; |
|
|
38
|
|
|
|
|
314
|
|
|
|
38
|
|
|
|
|
209
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
38
|
|
|
38
|
|
2820
|
use HTML::Entities; |
|
|
38
|
|
|
|
|
9553
|
|
|
|
38
|
|
|
|
|
1919
|
|
|
6
|
38
|
|
|
38
|
|
1110
|
use Encode; |
|
|
38
|
|
|
|
|
15570
|
|
|
|
38
|
|
|
|
|
15494
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
38
|
|
|
38
|
|
15472
|
eval "use HTML::Toc;use HTML::TocInsertor;"; |
|
|
38
|
|
|
38
|
|
82853
|
|
|
|
38
|
|
|
|
|
1026
|
|
|
|
38
|
|
|
|
|
17558
|
|
|
|
38
|
|
|
|
|
338230
|
|
|
|
38
|
|
|
|
|
717
|
|
|
9
|
|
|
|
|
|
|
my $eval_res = $@; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head2 module_loaded |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Return true if the module is loaded. |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
|
16
|
|
|
|
|
|
|
|
|
17
|
303
|
50
|
|
303
|
1
|
1491
|
sub module_loaded { $eval_res ? 0 : 1 } |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 NAME |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
MojoMojo::Formatter::TOC - generate table of contents |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This formatter will replace C<{{toc}}> with a table of contents, using |
|
26
|
|
|
|
|
|
|
HTML::GenToc. If you don't want an element to be included in the TOC, |
|
27
|
|
|
|
|
|
|
make it have C<class="notoc"> |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 METHODS |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head2 format_content_order |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
The TOC formatter expects HTML input so it needs to run after the main |
|
34
|
|
|
|
|
|
|
formatter. Since comment-type formatters (order 91) could add a heading |
|
35
|
|
|
|
|
|
|
for the comment section, the TOC formatter will run with a priority of 95. |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=cut |
|
38
|
|
|
|
|
|
|
|
|
39
|
868
|
|
|
868
|
1
|
2653
|
sub format_content_order { 95 } |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head2 format_content |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Calls the formatter. Takes a ref to the content as well as the context object. |
|
44
|
|
|
|
|
|
|
The syntax for the TOC plugin invocation is: |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
{{toc M- }} # start from Header level M |
|
47
|
|
|
|
|
|
|
{{toc -N }} # stop at Header level N |
|
48
|
|
|
|
|
|
|
{{toc M-N }} # process only header levels M..N |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
where M is the minimum heading level to include in the TOC, and N is the |
|
51
|
|
|
|
|
|
|
maximum level (depth). For example, suppose you only have one H1 on the page |
|
52
|
|
|
|
|
|
|
so it doesn't make sense to add it to the TOC; also, assume you and don't want |
|
53
|
|
|
|
|
|
|
to include any headers smaller than H3. The {{toc}} markup to achieve that would be: |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
{{toc 2-3}} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Defaults to 1-6. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub format_content { |
|
62
|
132
|
|
|
132
|
1
|
54176
|
my ( $class, $content ) = @_; |
|
63
|
132
|
50
|
|
|
|
591
|
return unless $class->module_loaded; |
|
64
|
132
|
|
|
|
|
609
|
my $toc_params_RE = qr/\s+ (\d+)? \s* - \s* (\d+)?/x; |
|
65
|
132
|
|
|
|
|
1936
|
while ( |
|
66
|
|
|
|
|
|
|
# replace the {{toc ..}} markup tag and parse potential parameters |
|
67
|
|
|
|
|
|
|
$$content =~ s[ |
|
68
|
|
|
|
|
|
|
\{\{ toc (?:$toc_params_RE)? \s* \/? }} |
|
69
|
|
|
|
|
|
|
][<div class="toc">\n<!--mojomojoTOCwillgohere-->\n</div>]ix) { |
|
70
|
12
|
|
|
|
|
33
|
my ($toc_h_min, $toc_h_max); |
|
71
|
12
|
|
100
|
|
|
62
|
$toc_h_min = $1 || 1; |
|
72
|
12
|
|
100
|
|
|
54
|
$toc_h_max = $2 || 9; # in practice, there are no more than 6 heading levels |
|
73
|
12
|
50
|
|
|
|
46
|
$toc_h_min = 9 if $toc_h_min > 9; # prevent TocGenerator error for headings >= 10 |
|
74
|
12
|
50
|
33
|
|
|
60
|
$toc_h_max = 9 if $toc_h_max > 9 or $toc_h_max < $toc_h_min; # {{toc 3-1}} is wrong; make it {{toc 3-9}} instead |
|
75
|
|
|
|
|
|
|
|
|
76
|
12
|
|
|
|
|
68
|
my $toc = HTML::Toc->new(); |
|
77
|
12
|
|
|
|
|
663
|
my $tocInsertor = HTML::TocInsertor->new(); |
|
78
|
|
|
|
|
|
|
|
|
79
|
12
|
|
|
|
|
1914
|
$toc->setOptions({ |
|
80
|
|
|
|
|
|
|
header => '', # by default, \n<!-- Table of Contents generated by Perl - HTML::Toc -->\n |
|
81
|
|
|
|
|
|
|
footer => '', |
|
82
|
|
|
|
|
|
|
insertionPoint => 'replace <!--mojomojoTOCwillgohere-->', |
|
83
|
|
|
|
|
|
|
doLinkToId => 0, |
|
84
|
|
|
|
|
|
|
levelToToc => "[$toc_h_min-$toc_h_max]", |
|
85
|
|
|
|
|
|
|
templateAnchorName => \&assembleAnchorName, |
|
86
|
|
|
|
|
|
|
}); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# http://search.cpan.org/dist/HTML-Toc/Toc.pod#HTML::TocInsertor::insert() |
|
89
|
12
|
|
|
|
|
261
|
$tocInsertor->insert($toc, $$content, {output => $content}); |
|
90
|
|
|
|
|
|
|
|
|
91
|
12
|
|
|
|
|
18101
|
return 1; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head2 SEO-friendly anchors |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Anchors should be generated with SEO- (and human-) friendly names, i.e. out of the entire |
|
98
|
|
|
|
|
|
|
token text, instead of being numeric or reduced to the first word(s) of the token. |
|
99
|
|
|
|
|
|
|
In the spirit of http://seo2.0.onreact.com/top-10-fatal-url-design-mistakes, compare: |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
http://beachfashion.com/photos/Pamela_Anderson#In_red_swimsuit_in_Baywatch |
|
102
|
|
|
|
|
|
|
vs. |
|
103
|
|
|
|
|
|
|
http://beachfashion.com/photos/Pamela_Anderson#in |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
"Which one speaks your language more, which one will you rather click?" |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
The anchor names generated are compliant with XHTML1.0 Strict. Also, per the |
|
108
|
|
|
|
|
|
|
HTML 4.01 spec, anchor names should be restricted to ASCII characters and |
|
109
|
|
|
|
|
|
|
anchors that differ only in case may not appear in the same document. In |
|
110
|
|
|
|
|
|
|
particular, an anchor name may be defined only once in a document (logically, |
|
111
|
|
|
|
|
|
|
because otherwise the user agent wouldn't know which #foo to scroll to). |
|
112
|
|
|
|
|
|
|
This is currently a problem with L<HTML::Toc> v1.11, which doesn't have |
|
113
|
|
|
|
|
|
|
support for passing the already existing anchors to the C<templateAnchorName> |
|
114
|
|
|
|
|
|
|
sub. |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head2 assembleAnchorName |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
http://search.cpan.org/dist/HTML-Toc/Toc.pod#templateAnchorName |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub assembleAnchorName { |
|
123
|
49
|
|
|
49
|
1
|
61192
|
my ($aFile, $aGroupId, $aLevel, $aNode, $text, $children) = @_; |
|
124
|
|
|
|
|
|
|
|
|
125
|
49
|
50
|
|
|
|
246
|
if ($text !~ /^\s*$/) { |
|
126
|
|
|
|
|
|
|
# generate a SEO-friendly anchor right from the token content |
|
127
|
|
|
|
|
|
|
# The allowed character set is limited first by the URI specification for fragments, http://tools.ietf.org/html/rfc3986#section-2: characters |
|
128
|
|
|
|
|
|
|
# then by the limitations of the values of 'id' and 'name' attributes: http://www.w3.org/TR/REC-html40/types.html#type-name |
|
129
|
|
|
|
|
|
|
# Eventually, the only punctuation allowed in id values is [_.:-] |
|
130
|
|
|
|
|
|
|
# Unicode characters with code points > 0x7E (e.g. Chinese characters) are allowed (test "<h1 id="????">header</h1>" at http://validator.w3.org/#validate_by_input+with_options), except for smart quotes (!), see http://www.w3.org/Search/Mail/Public/search?type-index=www-validator&index-type=t&keywords=[VE][122]+smart+quotes&search=Search+Mail+Archives |
|
131
|
|
|
|
|
|
|
# However, that contradicts the HTML 4.01 spec: "Anchor names should be restricted to ASCII characters." - http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
|
132
|
|
|
|
|
|
|
# ...and the [A-Za-z] class of letters mentioned at http://www.w3.org/TR/REC-html40/types.html#type-name |
|
133
|
|
|
|
|
|
|
# Finally, note that pod2html fails miserably to generate XHTML-compliant anchor links. See http://validator.w3.org/check?uri=http%3A%2F%2Fsearch.cpan.org%2Fdist%2FCatalyst-Runtime%2Flib%2FCatalyst%2FRequest.pm&charset=(detect+automatically)&doctype=XHTML+1.0+Transitional&group=0&user-agent=W3C_Validator%2F1.606 |
|
134
|
49
|
|
|
|
|
217
|
$text =~ s/\s/_/g; |
|
135
|
49
|
|
|
|
|
226
|
decode_entities($text); # we need to replace [#&;] only when they are NOT part of an HTML entity. decode_entities saves us from crafting a nasty regexp |
|
136
|
49
|
|
|
|
|
158
|
$text = encode('utf-8', $text); # convert to UTF-8 because we need to output the UTF-8 bytes |
|
137
|
49
|
|
|
|
|
2650
|
$text =~ s/([^A-Za-z0-9_:.-])/sprintf('.%02X', ord($1))/eg; # MediaWiki also uses the period, see http://en.wikipedia.org/wiki/Hierarchies#Ethics.2C_behavioral_psychology.2C_philosophies_of_identity |
|
|
69
|
|
|
|
|
203
|
|
|
138
|
49
|
100
|
|
|
|
163
|
$text = 'L'.$text if $text =~ /\A\W/; # "ID and NAME tokens must begin with a letter ([A-Za-z])" -- http://www.w3.org/TR/html4/types.html#type-name |
|
139
|
|
|
|
|
|
|
} |
|
140
|
49
|
50
|
|
|
|
142
|
$text = 'id' if $text eq ''; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# check if the anchor already exists; if so, add a number |
|
143
|
|
|
|
|
|
|
# NOTE: there is no way currently to do this easily in HTML-Toc-1.10. |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
#my $anch_num = 1; |
|
146
|
|
|
|
|
|
|
#my $word_name = $name; |
|
147
|
|
|
|
|
|
|
## Reference: http://www.w3.org/TR/REC-html40/struct/links.html#h-12.2.1 |
|
148
|
|
|
|
|
|
|
## Anchor names must be unique within a document. Anchor names that differ only in case may not appear in the same document. |
|
149
|
|
|
|
|
|
|
#while (grep {lc $_ eq lc $name} keys %{$args{anchors}}) { |
|
150
|
|
|
|
|
|
|
# # FIXME (in caller sub): to avoid the grep above, the $args{anchors} hash |
|
151
|
|
|
|
|
|
|
# # should have as key the lowercased anchor name, and as value its actual value (instead of '1') |
|
152
|
|
|
|
|
|
|
# $name = $word_name . "_$anch_num"; |
|
153
|
|
|
|
|
|
|
# $anch_num++; |
|
154
|
|
|
|
|
|
|
#} |
|
155
|
|
|
|
|
|
|
|
|
156
|
49
|
|
|
|
|
136
|
return $text; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
L<MojoMojo> and L<Module::Pluggable::Ordered>. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 AUTHORS |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Dan Dascalescu, L<http://dandascalescu.com> |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head1 LICENSE |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
This library is free software. You can redistribute it and/or modify |
|
171
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |