blib/lib/Pod/HtmlEasy/Parser.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 263 | 283 | 92.9 |
branch | 64 | 80 | 80.0 |
condition | 11 | 12 | 91.6 |
subroutine | 31 | 33 | 93.9 |
pod | 0 | 7 | 0.0 |
total | 369 | 415 | 88.9 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | ############################################################################# | ||||||
2 | ## Name: Parser.pm | ||||||
3 | ## Purpose: Pod::HtmlEasy::Parser | ||||||
4 | ## Author: Graciliano M. P. | ||||||
5 | ## Modified by: Geoffrey Leach | ||||||
6 | ## Created: 11/01/2004 | ||||||
7 | ## Updated: 2010-06-13 | ||||||
8 | ## Copyright: (c) 2004 Graciliano M. P. (c) 2007 - 2013 Geoffrey Leach | ||||||
9 | ## Licence: This program is free software; you can redistribute it and/or | ||||||
10 | ## modify it under the same terms as Perl itself | ||||||
11 | ############################################################################# | ||||||
12 | |||||||
13 | package Pod::HtmlEasy::Parser; | ||||||
14 | 4 | 4 | 70 | use 5.006003; | |||
4 | 12 | ||||||
4 | 172 | ||||||
15 | |||||||
16 | 4 | 4 | 21 | use base qw{ Pod::Parser }; | |||
4 | 7 | ||||||
4 | 536 | ||||||
17 | 4 | 4 | 21 | use Pod::Parser; | |||
4 | 5 | ||||||
4 | 225 | ||||||
18 | 4 | 4 | 4889 | use Pod::ParseLink; | |||
4 | 4013 | ||||||
4 | 266 | ||||||
19 | 4 | 4 | 1101 | use Readonly; | |||
4 | 3064 | ||||||
4 | 268 | ||||||
20 | 4 | 4 | 3112 | use Pod::HtmlEasy::Data qw(EMPTY NUL); | |||
4 | 15 | ||||||
4 | 373 | ||||||
21 | |||||||
22 | 4 | 4 | 34 | use Carp; | |||
4 | 7 | ||||||
4 | 275 | ||||||
23 | 4 | 4 | 20 | use English qw{ -no_match_vars }; | |||
4 | 8 | ||||||
4 | 27 | ||||||
24 | 4 | 4 | 16076 | use Regexp::Common qw{ whitespace number URI }; | |||
4 | 28016 | ||||||
4 | 28 | ||||||
25 | 4 | 4 | 198427 | use Regexp::Common::URI::RFC2396 qw { $escaped }; | |||
4 | 12 | ||||||
4 | 399 | ||||||
26 | 4 | 4 | 4842 | use Pod::Escapes qw{ e2char }; | |||
4 | 15872 | ||||||
4 | 781 | ||||||
27 | |||||||
28 | our $VERSION = version->declare("v1.1.11"); | ||||||
29 | |||||||
30 | # Provided for RT 82400. Use native switch if available. | ||||||
31 | BEGIN { | ||||||
32 | 4 | 50 | 4 | 230 | if ($PERL_VERSION >= 5.012) { | ||
33 | 4 | 40 | require feature; | ||||
34 | 4 | 522 | "feature"->import(qw(switch)); | ||||
35 | } else { | ||||||
36 | 0 | 0 | require Switch; | ||||
37 | 0 | 0 | "Switch"->import(qw(Perl6)); | ||||
38 | } | ||||||
39 | } | ||||||
40 | |||||||
41 | 4 | 4 | 25 | use strict; | |||
4 | 10 | ||||||
4 | 142 | ||||||
42 | 4 | 4 | 20 | use warnings; | |||
4 | 7 | ||||||
4 | 5310 | ||||||
43 | |||||||
44 | ######## | ||||||
45 | # VARS # | ||||||
46 | ######## | ||||||
47 | |||||||
48 | Readonly::Scalar my $NUL => NUL; | ||||||
49 | |||||||
50 | # RT 58274 [\w-]+ => [\w\.-] | ||||||
51 | # Commented patterns temp test | ||||||
52 | Readonly::Scalar my $MAIL_RE => qr{ | ||||||
53 | ( # grab all of this | ||||||
54 | [\w\.-]+ # some word chars with '-' and '.'included foo | ||||||
55 | \0? # possible NUL escape | ||||||
56 | \@ # literal '@' @ | ||||||
57 | [\w\.-]+ # another word bar | ||||||
58 | (?: # non-grabbing pattern | ||||||
59 | # \. # literal '.' . | ||||||
60 | [\w\.-]+ # that word stuff stuff | ||||||
61 | # \. # another literal '.' . | ||||||
62 | [\w\.-]+ # another word and | ||||||
63 | | # or | ||||||
64 | # \. # literal '.' . | ||||||
65 | [\w\.-]+ # word nonsense | ||||||
66 | | # or empty? | ||||||
67 | ) # end of non-grab | ||||||
68 | ) # end of grab | ||||||
69 | }smx; # [6062] | ||||||
70 | |||||||
71 | |||||||
72 | |||||||
73 | # Treatment of embedded HTML-significant characters and embedded URIs. | ||||||
74 | |||||||
75 | # There are some characters (%HTML_ENTITIES below) which may in some | ||||||
76 | # circumstances be interpreted by a browser, and you probably don't want that | ||||||
77 | # Consequently, they are replaced by names defined by the W3C UNICODE spec, | ||||||
78 | # http://www.w3.org/TR/MathML2/bycodes.html, bracketed by '&' and ';' | ||||||
79 | # Thus, '>' becomes '<' This is handled by _encode_entities() | ||||||
80 | # There's a "gotchya" in this process. As we are generating HTML, | ||||||
81 | # the encoding needs to take place _before_ any HTML is generated. | ||||||
82 | |||||||
83 | # If the HTML appears garbled, and UNICODE entities appear where they | ||||||
84 | # shouldn't, this encoding has happened to late at some point. | ||||||
85 | |||||||
86 | # This is all further complicated by the fact that the POD formatting | ||||||
87 | # codes syntax uses some of the same characters, as in "L<...>", for example, | ||||||
88 | # and we can't expand those first, because some of them generate | ||||||
89 | # HTML. This is resolved by tagging the characters that we want | ||||||
90 | # to distinguish from HTML with ASCII NUL ('\0', $NUL). Thus, '$lt;' becomes | ||||||
91 | # '\0&' in _encode_entities(). Generated HTML is also handled | ||||||
92 | # this way by _nul_escape(). After all processing of the POD formatting | ||||||
93 | # codes are processed, this is reversed by _remove _nul_escapes(). | ||||||
94 | |||||||
95 | # Then there's the issue of embedded URIs. URIs are also generated | ||||||
96 | # by the processing of L<...>, and can show up _inside L<...>, we | ||||||
97 | # delay processing of embedded URIs until after all of the POD | ||||||
98 | # formatting codes is complete. URIs that result from that processing | ||||||
99 | # are tagged (you guessed it!) with a NUL character, but not preceeding | ||||||
100 | # the generated URI, but after the first character. These NULs are removed | ||||||
101 | # by _remove _nul_escapes() | ||||||
102 | |||||||
103 | Readonly::Hash my %HTML_ENTITIES => ( | ||||||
104 | q{&} => q{amp}, | ||||||
105 | q{>} => q{gt}, | ||||||
106 | q{<} => q{lt}, | ||||||
107 | q{"} => q{quot}, | ||||||
108 | ); | ||||||
109 | |||||||
110 | my $HTML_ENTITIES_RE = join q{|}, keys %HTML_ENTITIES; | ||||||
111 | $HTML_ENTITIES_RE = qr{$HTML_ENTITIES_RE}msx; | ||||||
112 | |||||||
113 | ################# | ||||||
114 | # _NUL_ESCAPE # | ||||||
115 | ################# | ||||||
116 | |||||||
117 | # Escape HTML-significant characters with ASCII NUL to differentiate them | ||||||
118 | # from the same characters that get converted to entity names | ||||||
119 | sub _nul_escape { | ||||||
120 | 89 | 89 | 115 | my $txt_ref = shift; | |||
121 | |||||||
122 | 89 | 98 | ${$txt_ref} =~ s{($HTML_ENTITIES_RE)}{$NUL$1}gsmx; | ||||
89 | 1554 | ||||||
123 | 89 | 177 | return; | ||||
124 | } | ||||||
125 | |||||||
126 | ####################### | ||||||
127 | # _REMOVE_NUL_ESCAPSE # | ||||||
128 | ####################### | ||||||
129 | |||||||
130 | sub _remove_nul_escapes { | ||||||
131 | 496 | 496 | 594 | my $txt_ref = shift; | |||
132 | |||||||
133 | 496 | 481 | ${$txt_ref} =~ s{$NUL}{}gsmx; | ||||
496 | 1596 | ||||||
134 | 496 | 847 | return; | ||||
135 | } | ||||||
136 | |||||||
137 | #################### | ||||||
138 | # _ENCODE_ENTITIES # | ||||||
139 | #################### | ||||||
140 | |||||||
141 | sub _encode_entities { | ||||||
142 | 638 | 638 | 821 | my $txt_ref = shift; | |||
143 | |||||||
144 | 638 | 100 | 66 | 1555 | if ( !( defined $txt_ref && length ${$txt_ref} ) ) { return; } | ||
638 | 2611 | ||||||
259 | 680 | ||||||
145 | |||||||
146 | 379 | 2126 | foreach my $chr ( keys %HTML_ENTITIES ) { | ||||
147 | |||||||
148 | # $chr gets a lookbehind to avoid converting flagged from E<...> | ||||||
149 | 1516 | 9391 | my $re = qq{(? | ||||
150 | 1516 | 1464 | ${$txt_ref} =~ s{$re}{$NUL&$HTML_ENTITIES{$chr};}gsmx; | ||||
1516 | 21312 | ||||||
151 | } | ||||||
152 | |||||||
153 | 379 | 1191 | return; | ||||
154 | } | ||||||
155 | |||||||
156 | ################# | ||||||
157 | # _ADD_URI_HREF # | ||||||
158 | ################# | ||||||
159 | |||||||
160 | # process embedded URIs that are not noted in L<...> bracketing | ||||||
161 | # Note that the HTML-significant characters are escaped; | ||||||
162 | # The escapes are removed by _encode_entities | ||||||
163 | # Note that there's no presumption that there's a URI in the | ||||||
164 | # text, so not matching is _not_ and error. | ||||||
165 | |||||||
166 | sub _add_uri_href { | ||||||
167 | 348 | 348 | 457 | my ($txt_ref) = @_; | |||
168 | |||||||
169 | 348 | 100 | 367 | if ( ${$txt_ref} =~ m{https?:}smx ) { | |||
348 | 908 | ||||||
170 | |||||||
171 | # Replace escaped characters in URL with their ASCII equivalents | ||||||
172 | # Regexp::Common escapes in path part, but not in host part, which appears correct | ||||||
173 | # per the RFC. However, the Spamassassin folks use it in the host. | ||||||
174 | # $escaped is defined by Regexp::Common::URI::RFC2396, and matches %xx | ||||||
175 | # This is done first because if needed, the host part won't be parsed correctly | ||||||
176 | 12 | 17 | while ( ${$txt_ref} =~ m{($escaped)}msx ) { | ||||
13 | 130 | ||||||
177 | 1 | 4 | my $esc = $1; | ||||
178 | 1 | 2 | my $new = $1; | ||||
179 | 1 | 4 | $new =~ s{%}{0x}msx; | ||||
180 | 1 | 7 | $new = e2char($new); | ||||
181 | 1 | 22 | ${$txt_ref} =~ s{$esc}{$new}gmsx; | ||||
1 | 70 | ||||||
182 | } | ||||||
183 | |||||||
184 | # target='_blank' causes load to a new window or tab | ||||||
185 | # See HTML 4.01 spec, section 6.16 Frame target names | ||||||
186 | # Doing this because URI RE grabs non-word trailing characters | ||||||
187 | # ${$txt_ref} =~ m{$RE{URI}{HTTP}{-keep}{-scheme=>'https?'}}mx; | ||||||
188 | # my $uri = $1; | ||||||
189 | # my $host = $3; | ||||||
190 | # $uri =~ s{[^/\w]+\z}{}mx; | ||||||
191 | # ${$txt_ref} =~ s{$uri}{$host}mx; | ||||||
192 | 12 | 17 | ${$txt_ref} | ||||
12 | 64 | ||||||
193 | =~ s{$RE{URI}{HTTP}{-keep}{-scheme=>'https?'}}{$3}gsmx; | ||||||
194 | |||||||
195 | 12 | 3419 | return; | ||||
196 | } | ||||||
197 | |||||||
198 | 336 | 100 | 362 | if ( ${$txt_ref} =~ m{ftp:}smx ) { | |||
336 | 12682 | ||||||
199 | 2 | 4 | ${$txt_ref} =~ s{$RE{URI}{FTP}{-keep}}{$5}gsmx; | ||||
2 | 12 | ||||||
200 | 2 | 892 | return; | ||||
201 | } | ||||||
202 | |||||||
203 | 334 | 100 | 414 | if ( ${$txt_ref} =~ m{file:}smx ) { | |||
334 | 747 | ||||||
204 | 2 | 4 | ${$txt_ref} =~ s{$RE{URI}{file}{-keep}}{$3}gsmx; | ||||
2 | 12 | ||||||
205 | 2 | 506 | return; | ||||
206 | } | ||||||
207 | |||||||
208 | 332 | 100 | 426 | if ( ${$txt_ref} =~ m{$MAIL_RE}smx ) { | |||
332 | 1356 | ||||||
209 | 4 | 9 | ${$txt_ref} =~ s{mailto://}{}smx; | ||||
4 | 13 | ||||||
210 | 4 | 5 | ${$txt_ref} =~ s{($MAIL_RE)}{$1}gsmx; | ||||
4 | 276 | ||||||
211 | 4 | 11 | return; | ||||
212 | } | ||||||
213 | |||||||
214 | 328 | 516 | return; | ||||
215 | } | ||||||
216 | |||||||
217 | ########### | ||||||
218 | # COMMAND # | ||||||
219 | ########### | ||||||
220 | |||||||
221 | # Index levels, which translate into indentation in the index | ||||||
222 | Readonly::Scalar my $LEVEL1 => 1; | ||||||
223 | Readonly::Scalar my $LEVEL2 => 2; | ||||||
224 | Readonly::Scalar my $LEVEL3 => 3; | ||||||
225 | Readonly::Scalar my $LEVEL4 => 4; | ||||||
226 | Readonly::Scalar my $LEVELL => 0; | ||||||
227 | |||||||
228 | # Overrides command() provided by base class in Pod::Parser | ||||||
229 | sub command { | ||||||
230 | 129 | 129 | 0 | 258 | my ( $parser, $command, $paragraph, $line_num, $pod ) = @_; | ||
231 | |||||||
232 | 129 | 100 | 367 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
233 | 89 | 199 | _verbatim($parser); | ||||
234 | } # [6062] | ||||||
235 | |||||||
236 | 129 | 9244 | my $expansion = $parser->interpolate( $paragraph, $line_num ); | ||||
237 | |||||||
238 | 129 | 897 | $expansion =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
239 | |||||||
240 | # Encoding puts in a NUL; we're finished with the text, so remove them | ||||||
241 | 129 | 25664 | _encode_entities( \$expansion ); | ||||
242 | 129 | 284 | _remove_nul_escapes( \$expansion ); | ||||
243 | |||||||
244 | 129 | 167 | my $html; | ||||
245 | 4 | 4 | 31 | no warnings; # 'experimental' | |||
4 | 7 | ||||||
4 | 3090 | ||||||
246 | 129 | 187 | given ($command) { | ||||
247 | 129 | 412 | when (q{head1}) { | ||||
248 | 19 | 62 | _add_index( $parser, $expansion, $LEVEL1 ); | ||||
249 | 19 | 106 | $html = $parser->{POD_HTMLEASY} | ||||
250 | ->{ON_HEAD1}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
251 | } | ||||||
252 | 110 | 159 | when (q{head2}) { | ||||
253 | 11 | 34 | _add_index( $parser, $expansion, $LEVEL2 ); | ||||
254 | 11 | 65 | $html = $parser->{POD_HTMLEASY} | ||||
255 | ->{ON_HEAD2}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
256 | } | ||||||
257 | 99 | 139 | when (q{head3}) { | ||||
258 | 3 | 11 | _add_index( $parser, $expansion, $LEVEL3 ); | ||||
259 | 3 | 21 | $html = $parser->{POD_HTMLEASY} | ||||
260 | ->{ON_HEAD3}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
261 | } | ||||||
262 | 96 | 132 | when (q{head4}) { | ||||
263 | 3 | 10 | _add_index( $parser, $expansion, $LEVEL4 ); | ||||
264 | 3 | 20 | $html = $parser->{POD_HTMLEASY} | ||||
265 | ->{ON_HEAD4}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
266 | } | ||||||
267 | 93 | 117 | when (q{begin}) { | ||||
268 | 2 | 24 | $html = $parser->{POD_HTMLEASY} | ||||
269 | ->{ON_BEGIN}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
270 | } | ||||||
271 | 91 | 129 | when (q{end}) { | ||||
272 | 2 | 26 | $html = $parser->{POD_HTMLEASY} | ||||
273 | ->{ON_END}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
274 | } | ||||||
275 | 89 | 127 | when (q{over}) { | ||||
276 | 5 | 29 | $html = $parser->{POD_HTMLEASY} | ||||
277 | ->{ON_OVER}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
278 | } | ||||||
279 | 84 | 134 | when (q{item}) { | ||||
280 | |||||||
281 | # Items that begin with '* ' are ugly. Is it there for pod2man? | ||||||
282 | # Which is not the same as _only_ '*' | ||||||
283 | 33 | 181 | $expansion =~ s{\A\*\s+}{}msx; | ||||
284 | |||||||
285 | 33 | 100 | 156 | if ( $parser->{INDEX_ITEM} ) { | |||
286 | 16 | 53 | _add_index( $parser, $expansion, $LEVELL ); | ||||
287 | } | ||||||
288 | |||||||
289 | # This is for the folks who use =item to list URLs | ||||||
290 | 33 | 50 | 107 | if ( $expansion !~ m{ | |||
291 | |||||||
292 | # The URI's not already encoded (L<...> is already processed) | ||||||
293 | 33 | 80 | _add_uri_href( \$expansion ); | ||||
294 | } | ||||||
295 | 33 | 163 | $html = $parser->{POD_HTMLEASY} | ||||
296 | ->{ON_ITEM}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
297 | } | ||||||
298 | 51 | 87 | when (q{back}) { | ||||
299 | 5 | 30 | $html = $parser->{POD_HTMLEASY} | ||||
300 | ->{ON_BACK}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
301 | } | ||||||
302 | 46 | 74 | when (q{for}) { | ||||
303 | 1 | 6 | $html = $parser->{POD_HTMLEASY} | ||||
304 | ->{ON_FOR}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
305 | } | ||||||
306 | 45 | 61 | default { | ||||
307 | 45 | 50 | 566 | if ( defined $parser->{POD_HTMLEASY}->{qq{ON_\U$command\E}} ) { | |||
50 | |||||||
308 | 0 | 0 | $html | ||||
309 | = $parser->{POD_HTMLEASY} | ||||||
310 | ->{qq{ON_\U$command\E}}( $parser->{POD_HTMLEASY}, | ||||||
311 | $expansion ); | ||||||
312 | } | ||||||
313 | elsif ( $command !~ /^(?:pod|cut)$/imsx ) { | ||||||
314 | 0 | 0 | $html = qq{=$command $expansion}; |
||||
315 | } | ||||||
316 | 45 | 133 | else { $html = EMPTY; } | ||||
317 | } | ||||||
318 | }; | ||||||
319 | 4 | 4 | 35 | use warnings; | |||
4 | 8 | ||||||
4 | 3255 | ||||||
320 | |||||||
321 | 129 | 100 | 360 | if ( $html ne EMPTY ) { | |||
322 | 73 | 92 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
73 | 211 | ||||||
323 | } | ||||||
324 | |||||||
325 | 129 | 5567 | return; | ||||
326 | } | ||||||
327 | |||||||
328 | ############ | ||||||
329 | # VERBATIM # | ||||||
330 | ############ | ||||||
331 | |||||||
332 | # Overrides verbatim() provided by base class in Pod::Parser | ||||||
333 | sub verbatim { | ||||||
334 | 12 | 12 | 0 | 21 | my ( $parser, $paragraph, $line_num ) = @_; | ||
335 | |||||||
336 | 12 | 50 | 36 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
0 | 0 | ||||||
337 | 12 | 23 | $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} .= $paragraph; | ||||
338 | |||||||
339 | 12 | 260 | return; | ||||
340 | } | ||||||
341 | |||||||
342 | sub _verbatim { | ||||||
343 | 209 | 209 | 294 | my ($parser) = @_; | |||
344 | |||||||
345 | 209 | 100 | 619 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
1 | 3 | ||||||
346 | 208 | 380 | my $expansion = $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER}; | ||||
347 | 208 | 514 | $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} = EMPTY; | ||||
348 | |||||||
349 | 208 | 447 | _encode_entities( \$expansion ); | ||||
350 | |||||||
351 | # If we had "=item *", we should now be looking at the text that will | ||||||
352 | # appear as the item. The "*" was passed over initially, so we need | ||||||
353 | # the text to index. Save the flag as ON_VERBATIM deletes IN_ITEM | ||||||
354 | |||||||
355 | 208 | 100 | 723 | my $add_index = $parser->{INDEX_ITEM} && $parser->{POD_HTMLEASY}{IN_ITEM}; | |||
356 | |||||||
357 | 208 | 840 | my $html = $parser->{POD_HTMLEASY} | ||||
358 | ->{ON_VERBATIM}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
359 | |||||||
360 | # Now look for any embedded URIs | ||||||
361 | 208 | 445 | _add_uri_href( \$html ); | ||||
362 | |||||||
363 | # And remove any NUL escapes | ||||||
364 | 208 | 448 | _remove_nul_escapes( \$html ); | ||||
365 | |||||||
366 | 208 | 100 | 515 | if ( $html ne EMPTY ) { | |||
367 | 4 | 50 | 12 | if ($add_index) { _add_index( $parser, $expansion, $LEVELL ); } | |||
0 | 0 | ||||||
368 | 4 | 7 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
4 | 14 | ||||||
369 | } # [6062] | ||||||
370 | |||||||
371 | 208 | 391 | return; | ||||
372 | } | ||||||
373 | |||||||
374 | ############# | ||||||
375 | # TEXTBLOCK # | ||||||
376 | ############# | ||||||
377 | |||||||
378 | # Overrides textblock() provided by base class in Pod::Parser | ||||||
379 | sub textblock { | ||||||
380 | 108 | 108 | 0 | 271 | my ( $parser, $paragraph, $line_num ) = @_; | ||
381 | |||||||
382 | 108 | 100 | 539 | if ( exists $parser->{POD_HTMLEASY}->{IN_BEGIN} ) { return; } | |||
1 | 26 | ||||||
383 | 107 | 100 | 294 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
384 | 94 | 179 | _verbatim($parser); | ||||
385 | } # [6062] | ||||||
386 | |||||||
387 | 107 | 10538 | my $expansion = $parser->interpolate( $paragraph, $line_num ); | ||||
388 | |||||||
389 | 107 | 647 | $expansion =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
390 | 107 | 20917 | $expansion =~ s{\s+$}{}gsmx; | ||||
391 | |||||||
392 | # Encode HTML-specific characters before adding any HTML (eg ) |
||||||
393 | 107 | 238 | _encode_entities( \$expansion ); | ||||
394 | |||||||
395 | # If we had "=item *", we should now be looking at the text that will | ||||||
396 | # appear as the item. The "*" was passed over initially, so we need | ||||||
397 | # the text to index. Save the flag as ON_TEXTBLOCK deletes IN_ITEM | ||||||
398 | |||||||
399 | 107 | 100 | 358 | my $add_index = $parser->{INDEX_ITEM} && $parser->{POD_HTMLEASY}{IN_ITEM}; | |||
400 | |||||||
401 | 107 | 499 | my $html = $parser->{POD_HTMLEASY} | ||||
402 | ->{ON_TEXTBLOCK}( $parser->{POD_HTMLEASY}, $expansion ); | ||||||
403 | |||||||
404 | # Now look for any embedded URIs | ||||||
405 | 107 | 254 | _add_uri_href( \$html ); | ||||
406 | |||||||
407 | # And remove any NUL escapes | ||||||
408 | 107 | 781 | _remove_nul_escapes( \$html ); | ||||
409 | |||||||
410 | 107 | 50 | 347 | if ( $html ne EMPTY ) { | |||
411 | 107 | 100 | 210 | if ($add_index) { _add_index( $parser, $expansion, $LEVELL ); } | |||
4 | 14 | ||||||
412 | 107 | 112 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | ||||
107 | 324 | ||||||
413 | } | ||||||
414 | |||||||
415 | 107 | 3990 | return; | ||||
416 | } | ||||||
417 | |||||||
418 | ##################### | ||||||
419 | # INTERIOR_SEQUENCE # | ||||||
420 | ##################### | ||||||
421 | |||||||
422 | # Overrides interior_sequence() provided by base class in Pod::Parser | ||||||
423 | sub interior_sequence { | ||||||
424 | 89 | 89 | 0 | 255 | my ( $parser, $seq_command, $seq_argument, $pod_seq ) = @_; | ||
425 | |||||||
426 | 89 | 95 | my $ret; | ||||
427 | |||||||
428 | # Encode HTML-specific characters before adding any HTML (eg ) |
||||||
429 | 89 | 100 | 215 | if ( $seq_command ne q{L} ) { | |||
430 | 47 | 235 | _encode_entities( \$seq_argument ); | ||||
431 | } | ||||||
432 | |||||||
433 | 4 | 4 | 27 | no warnings; # 'experimental' | |||
4 | 8 | ||||||
4 | 2098 | ||||||
434 | 89 | 135 | given ($seq_command) { | ||||
435 | 89 | 464 | when (q{B}) { | ||||
436 | 3 | 24 | $ret = $parser->{POD_HTMLEASY} | ||||
437 | ->{ON_B}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
438 | } | ||||||
439 | 86 | 119 | when (q{C}) { | ||||
440 | 18 | 81 | $ret = $parser->{POD_HTMLEASY} | ||||
441 | ->{ON_C}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
442 | } | ||||||
443 | 68 | 92 | when (q{E}) { | ||||
444 | 11 | 46 | $ret = $parser->{POD_HTMLEASY} | ||||
445 | ->{ON_E}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
446 | } | ||||||
447 | 57 | 83 | when (q{F}) { | ||||
448 | 1 | 6 | $ret = $parser->{POD_HTMLEASY} | ||||
449 | ->{ON_F}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
450 | } | ||||||
451 | 56 | 448 | when (q{I}) { | ||||
452 | 5 | 32 | $ret = $parser->{POD_HTMLEASY} | ||||
453 | ->{ON_I}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
454 | } | ||||||
455 | 51 | 91 | when (q{L}) { | ||||
456 | |||||||
457 | # L<> causes problems, but not with parselink. | ||||||
458 | 42 | 50 | 122 | if ( $seq_argument eq EMPTY ) { | |||
459 | 0 | 0 | _errors( $parser, q{Empty L<>} ); | ||||
460 | 0 | 0 | return EMPTY; | ||||
461 | } | ||||||
462 | 42 | 136 | my @parsed = Pod::ParseLink::parselink($seq_argument); | ||||
463 | 42 | 1375 | foreach (@parsed) { | ||||
464 | 210 | 100 | 419 | if ( defined $_ ) { _encode_entities( \$_ ); } | |||
147 | 308 | ||||||
465 | } | ||||||
466 | |||||||
467 | # Encoding handled in ON_L() | ||||||
468 | 42 | 240 | $ret = $parser->{POD_HTMLEASY} | ||||
469 | ->{ON_L}( $parser->{POD_HTMLEASY}, @parsed ); | ||||||
470 | } | ||||||
471 | 9 | 14 | when (q{S}) { | ||||
472 | 5 | 26 | $ret = $parser->{POD_HTMLEASY} | ||||
473 | ->{ON_S}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
474 | } | ||||||
475 | 4 | 7 | when (q{Z}) { | ||||
476 | 2 | 12 | $ret = $parser->{POD_HTMLEASY} | ||||
477 | ->{ON_Z}( $parser->{POD_HTMLEASY}, $seq_argument ); | ||||||
478 | } | ||||||
479 | 2 | 6 | default { | ||||
480 | 2 | 50 | 16 | if ( defined $parser->{POD_HTMLEASY}->{qq{ON_\U$seq_command\E}} ) | |||
481 | { | ||||||
482 | 2 | 12 | $ret | ||||
483 | = $parser->{POD_HTMLEASY} | ||||||
484 | ->{qq{ON_\U$seq_command\E}}( $parser->{POD_HTMLEASY}, | ||||||
485 | $seq_argument ); | ||||||
486 | } | ||||||
487 | else { | ||||||
488 | 0 | 0 | $ret = qq{$seq_command<$seq_argument>}; | ||||
489 | } | ||||||
490 | } | ||||||
491 | } | ||||||
492 | 4 | 4 | 25 | use warnings; | |||
4 | 7 | ||||||
4 | 5004 | ||||||
493 | |||||||
494 | # Escape HTML-significant characters | ||||||
495 | 89 | 380 | _nul_escape( \$ret ); | ||||
496 | |||||||
497 | 89 | 7885 | return $ret; | ||||
498 | } | ||||||
499 | |||||||
500 | ######################## | ||||||
501 | # PREPROCESS_PARAGRAPH # | ||||||
502 | ######################## | ||||||
503 | |||||||
504 | Readonly::Scalar my $INFO_DONE => 3; | ||||||
505 | |||||||
506 | # Overrides preprocess_paragraph() provided by base class in Pod::Parser | ||||||
507 | # NB: the text is _not_ altered. | ||||||
508 | sub preprocess_paragraph { | ||||||
509 | 297 | 297 | 0 | 478 | my ( $parser, $text, $line_num ) = @_; | ||
510 | |||||||
511 | 297 | 50 | 989 | if ( $parser->{POD_HTMLEASY}{INFO_COUNT} == $INFO_DONE ) { | |||
512 | 0 | 0 | return $text; | ||||
513 | } | ||||||
514 | |||||||
515 | 297 | 100 | 776 | if ( not exists $parser->{POD_HTMLEASY}{PACKAGE} ) { | |||
516 | 295 | 100 | 1047 | if ( $text =~ m{package}smx ) { | |||
517 | 1 | 7 | my ($pack) = $text =~ m{package\s+(\w+(?:::\w+)*)}smx; | ||||
518 | 1 | 50 | 4 | if ( defined $pack ) { | |||
519 | 1 | 3 | $parser->{POD_HTMLEASY}{PACKAGE} = $pack; | ||||
520 | 1 | 3 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
521 | } | ||||||
522 | } | ||||||
523 | } | ||||||
524 | |||||||
525 | 297 | 100 | 718 | if ( not exists $parser->{POD_HTMLEASY}{VERSION} ) { | |||
526 | 295 | 100 | 673 | if ( $text =~ m{VERSION}smx ) { | |||
527 | 1 | 11 | my ($ver) = $text =~ m{($RE{num}{decimal})}smx; | ||||
528 | 1 | 50 | 377 | if ( defined $ver ) { | |||
529 | 1 | 4 | $parser->{POD_HTMLEASY}{VERSION} = $ver; | ||||
530 | 1 | 4 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
531 | } | ||||||
532 | } | ||||||
533 | } | ||||||
534 | |||||||
535 | # This situation is created by evt_on_head1() | ||||||
536 | # _do_title has found nothing following =head1 NAME, so it | ||||||
537 | # creates ...{TITLE}, and leaves it undef, so that it will be | ||||||
538 | # picked up here when the paragraph following is processed. | ||||||
539 | 297 | 100 | 100 | 1094 | if ( ( exists $parser->{POD_HTMLEASY}{TITLE} ) | ||
540 | and ( not defined $parser->{POD_HTMLEASY}{TITLE} ) ) | ||||||
541 | { | ||||||
542 | 4 | 14 | my @lines = split m{\n}smx, $text; | ||||
543 | 4 | 9 | my $tmp_text = shift @lines; | ||||
544 | 4 | 100 | 13 | if ( not defined $tmp_text ) { return $text; } | |||
2 | 45 | ||||||
545 | 2 | 13 | $tmp_text =~ s{$RE{ws}{crop}}{}gsmx; # delete surrounding whitespace | ||||
546 | 2 | 256 | $parser->{POD_HTMLEASY}{TITLE} = $tmp_text; | ||||
547 | 2 | 8 | $parser->{POD_HTMLEASY}{INFO_COUNT}++; | ||||
548 | } | ||||||
549 | |||||||
550 | 295 | 16795 | return $text; | ||||
551 | } | ||||||
552 | |||||||
553 | ############## | ||||||
554 | # _ADD_INDEX # | ||||||
555 | ############## | ||||||
556 | |||||||
557 | sub _add_index { | ||||||
558 | 56 | 56 | 134 | my ( $parser, $txt, $level ) = @_; | |||
559 | |||||||
560 | # Don't index star items | ||||||
561 | 56 | 100 | 190 | if ( $txt eq q{*} ) { return; } | |||
4 | 15 | ||||||
562 | |||||||
563 | 52 | 100 | 135 | if ( exists $parser->{INDEX_ITEM} ) { | |||
564 | 23 | 38 | my $max_len = $parser->{INDEX_LENGTH}; | ||||
565 | 23 | 100 | 73 | if ( length $txt > $max_len ) { | |||
566 | 1 | 13 | while ( substr( $txt, $max_len, 1 ) ne q{ } ) { | ||||
567 | 5 | 7 | $max_len++; | ||||
568 | 5 | 50 | 12 | last if $max_len >= length $txt; | |||
569 | } | ||||||
570 | 1 | 50 | 4 | if ( $max_len < length $txt ) { | |||
571 | 1 | 3 | $txt = substr( $txt, 0, $max_len ) . "..."; | ||||
572 | } | ||||||
573 | } | ||||||
574 | } | ||||||
575 | |||||||
576 | 52 | 118 | _remove_nul_escapes( \$txt ); | ||||
577 | 52 | 186 | push @{ $parser->{POD_HTMLEASY}->{INDEX} }, [ $level, $txt ]; | ||||
52 | 348 | ||||||
578 | |||||||
579 | 52 | 103 | return; | ||||
580 | |||||||
581 | } | ||||||
582 | |||||||
583 | ############# | ||||||
584 | # BEGIN_POD # | ||||||
585 | ############# | ||||||
586 | |||||||
587 | # Overrides begin_pod() provided by base class in Pod::Parser | ||||||
588 | sub begin_pod { | ||||||
589 | 48 | 48 | 0 | 97 | my ($parser) = @_; | ||
590 | |||||||
591 | 48 | 274 | delete $parser->{POD_HTMLEASY}->{INDEX}; | ||||
592 | 48 | 135 | $parser->{POD_HTMLEASY}->{INDEX} = []; | ||||
593 | |||||||
594 | 48 | 3163 | return 1; | ||||
595 | } | ||||||
596 | |||||||
597 | ########### | ||||||
598 | # END_POD # | ||||||
599 | ########### | ||||||
600 | |||||||
601 | # Overrides end_pod() provided by base class in Pod::Parser | ||||||
602 | sub end_pod { | ||||||
603 | 48 | 48 | 0 | 95 | my ($parser) = @_; | ||
604 | |||||||
605 | 48 | 100 | 186 | if ( defined $parser->{POD_HTMLEASY}->{VERBATIM_BUFFER} ) { | |||
606 | 26 | 59 | _verbatim($parser); | ||||
607 | } | ||||||
608 | |||||||
609 | 48 | 2193 | return 1; | ||||
610 | } | ||||||
611 | |||||||
612 | ########### | ||||||
613 | # _ERRORS # | ||||||
614 | ########### | ||||||
615 | |||||||
616 | sub _errors { | ||||||
617 | 0 | 0 | my ( $parser, $error ) = @_; | ||||
618 | |||||||
619 | 0 | carp "$error"; | |||||
620 | 0 | $error =~ s{^\s*\**\s*errors?:?\s*}{}ismx; | |||||
621 | 0 | $error =~ s{\s+$}{}smx; | |||||
622 | |||||||
623 | 0 | my $html = $parser->{POD_HTMLEASY} | |||||
624 | ->{ON_ERROR}( $parser->{POD_HTMLEASY}, $error ); | ||||||
625 | 0 | 0 | if ( $html ne EMPTY ) { | ||||
626 | 0 | push @{ $parser->{POD_HTMLEASY}->{HTML} }, $html; | |||||
0 | |||||||
627 | } | ||||||
628 | |||||||
629 | 0 | return 1; | |||||
630 | } | ||||||
631 | |||||||
632 | ########### | ||||||
633 | # DESTROY # | ||||||
634 | ########### | ||||||
635 | |||||||
636 | 0 | 0 | sub DESTROY { } | ||||
637 | |||||||
638 | ####### | ||||||
639 | # END # | ||||||
640 | ####### | ||||||
641 | |||||||
642 | 1; | ||||||
643 |