File Coverage

blib/lib/Pod/Simple/HTML.pm

Criterion	Covered	Total	%
statement	288	359	80.2
branch	121	178	67.9
condition	66	121	54.5
subroutine	36	39	92.3
pod	1	31	3.2
total	512	728	70.3

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::Simple::HTML;
2	7			7		211870	use strict;
	7					36
	7					200
3	7			7		39	use warnings;
	7					13
	7					161
4	7			7		3845	use Pod::Simple::PullParser ();
	7					25
	7					619
5							our @ISA = ('Pod::Simple::PullParser');
6							our $VERSION = '3.45';
7							BEGIN {
8	7	50		7		58	if(defined &DEBUG) { } # no-op
		50
9	7					42516	elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10	0					0	else { *DEBUG = sub () {0}; }
11							}
12
13							our $Doctype_decl \|\|= ''; # No. Just No. Don't even ask me for it.
14							# qq{
15							# "http://www.w3.org/TR/html4/loose.dtd">\n};
16
17							our $Content_decl \|\|=
18							q{};
19
20							our $HTML_EXTENSION;
21							$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22							our $Computerese;
23							$Computerese = "" unless defined $Computerese;
24							our $LamePad;
25							$LamePad = '' unless defined $LamePad;
26
27							our $Linearization_Limit;
28							$Linearization_Limit = 120 unless defined $Linearization_Limit;
29							# headings/items longer than that won't get an
30							our $Perldoc_URL_Prefix;
31							$Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32							unless defined $Perldoc_URL_Prefix;
33							our $Perldoc_URL_Postfix;
34							$Perldoc_URL_Postfix = ''
35							unless defined $Perldoc_URL_Postfix;
36
37
38							our $Man_URL_Prefix = 'http://man.he.net/man';
39							our $Man_URL_Postfix = '';
40
41							our $Title_Prefix;
42							$Title_Prefix = '' unless defined $Title_Prefix;
43							our $Title_Postfix;
44							$Title_Postfix = '' unless defined $Title_Postfix;
45							our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46							# 'item-text' stuff in the index doesn't quite work, and may
47							# not be a good idea anyhow.
48
49
50							__PACKAGE__->_accessorize(
51							'perldoc_url_prefix',
52							# In turning L into http://whatever/Foo%3a%3aBar, what
53							# to put before the "Foo%3a%3aBar".
54							# (for singleton mode only?)
55							'perldoc_url_postfix',
56							# what to put after "Foo%3a%3aBar" in the URL. Normally "".
57
58							'man_url_prefix',
59							# In turning L into http://whatever/man/1/crontab, what
60							# to put before the "1/crontab".
61							'man_url_postfix',
62							# what to put after the "1/crontab" in the URL. Normally "".
63
64							'batch_mode', # whether we're in batch mode
65							'batch_mode_current_level',
66							# When in batch mode, how deep the current module is: 1 for "LWP",
67							# 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68
69							'title_prefix', 'title_postfix',
70							# What to put before and after the title in the head.
71							# Should already be &-escaped
72
73							'html_h_level',
74
75							'html_header_before_title',
76							'html_header_after_title',
77							'html_footer',
78							'top_anchor',
79
80							'index', # whether to add an index at the top of each page
81							# (actually it's a table-of-contents, but we'll call it an index,
82							# out of apparently longstanding habit)
83
84							'html_css', # URL of CSS file to point to
85							'html_javascript', # URL of Javascript file to point to
86
87							'force_title', # should already be &-escaped
88							'default_title', # should already be &-escaped
89							);
90
91							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92							my @_to_accept;
93
94							our %Tagmap = (
95							'Verbatim' => "\n",
96							'/Verbatim' => "\n",
97							'VerbatimFormatted' => "\n",
98							'/VerbatimFormatted' => "\n",
99							'VerbatimB' => "",
100							'/VerbatimB' => "",
101							'VerbatimI' => "",
102							'/VerbatimI' => "",
103							'VerbatimBI' => "",
104							'/VerbatimBI' => "",
105
106
107							'Data' => "\n",
108							'/Data' => "\n",
109
110							'head1' => "\n ", # And also stick in an
111							'head2' => "\n ", # ''
112							'head3' => "\n ", # ''
113							'head4' => "\n ", # ''
114							'head5' => "\n ", # ''
115							'head6' => "\n ", # ''
116							'/head1' => "\n",
117							'/head2' => "\n",
118							'/head3' => "\n",
119							'/head4' => "\n",
120							'/head5' => "\n",
121							'/head6' => "\n",
122
123							'X' => "",
125
126							changes(qw(
127							Para=p
128							B=b I=i
129							over-bullet=ul
130							over-number=ol
131							over-text=dl
132							over-block=blockquote
133							item-bullet=li
134							item-number=li
135							item-text=dt
136							)),
137							changes2(
138							map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139							qw[
140							sample=samp
141							definition=dfn
142							keyboard=kbd
143							variable=var
144							citation=cite
145							abbreviation=abbr
146							acronym=acronym
147							subscript=sub
148							superscript=sup
149							big=big
150							small=small
151							underline=u
152							strikethrough=s
153							preformat=pre
154							teletype=tt
155							] # no point in providing a way to get ..., I think
156							),
157
158							'/item-bullet' => "$LamePad\n",
159							'/item-number' => "$LamePad\n",
160							'/item-text' => "$LamePad\n",
161							'item-body' => "\n ",
162							'/item-body' => "\n",
163
164
165							'B' => "", '/B' => "",
166							'I' => "", '/I' => "",
167							'F' => "", '/F' => "",
168							'C' => "", '/C' => "",
169							'L' => "", # ideally never used!
170							'/L' => "",
171							);
172
173							sub changes {
174	7	50		7	0	15	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	70					443
175							? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
176							} @_;
177							}
178							sub changes2 {
179	7	50		7	0	16	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	105					968
180							? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
181							} @_;
182							}
183
184							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185	0			0	0	0	sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
	0					0
186							# Just so we can run from the command line. No options.
187							# For that, use perldoc!
188							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189
190							sub new {
191	59			59	1	846	my $new = shift->SUPER::new(@_);
192							#$new->nix_X_codes(1);
193	59					239	$new->nbsp_for_S(1);
194	59					209	$new->accept_targets( 'html', 'HTML' );
195	59					194	$new->accept_codes('VerbatimFormatted');
196	59					195	$new->accept_codes(@_to_accept);
197	59					86	DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
198
199	59					191	$new->perldoc_url_prefix( $Perldoc_URL_Prefix );
200	59					185	$new->perldoc_url_postfix( $Perldoc_URL_Postfix );
201	59					164	$new->man_url_prefix( $Man_URL_Prefix );
202	59					169	$new->man_url_postfix( $Man_URL_Postfix );
203	59					183	$new->title_prefix( $Title_Prefix );
204	59					162	$new->title_postfix( $Title_Postfix );
205
206	59					251	$new->html_header_before_title(
207							qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="208">208</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td class="c3" > 59 </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > 187 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> "",
211							$Content_decl,
212							"\n",
213							$new->version_tag_comment,
214							"\n",
215							);
216	59					223	$new->html_footer( qq[\n\n\n\n] );
217	59					158	$new->top_anchor( "\n" );
218
219	59					2342	$new->{'Tagmap'} = {%Tagmap};
220
221	59					356	return $new;
222							}
223
224							sub __adjust_html_h_levels {
225	58			58		115	my ($self) = @_;
226	58					87	my $Tagmap = $self->{'Tagmap'};
227
228	58					181	my $add = $self->html_h_level;
229	58	100				152	return unless defined $add;
230	1	50	50			14	return if ($self->{'Adjusted_html_h_levels'}\|\|0) == $add;
231
232	1					2	$add -= 1;
233	1					7	for (1 .. 6) {
234	6					70	$Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
	6					19
235	6					41	$Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
	6					19
236							}
237							}
238
239							sub batch_mode_page_object_init {
240	10			10	0	23	my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
241	10					14	DEBUG and print STDERR "Initting $self\n for $module\n",
242							" in $infile\n out $outfile\n depth $depth\n";
243	10					26	$self->batch_mode(1);
244	10					27	$self->batch_mode_current_level($depth);
245	10					19	return $self;
246							}
247
248							sub run {
249	59			59	0	93	my $self = $_[0];
250	59	100				170	return $self->do_middle if $self->bare_output;
251							return
252	18		100			48	$self->do_beginning && $self->do_middle && $self->do_end;
253							}
254
255							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256
257							sub do_beginning {
258	18			18	0	28	my $self = $_[0];
259
260	18					21	my $title;
261
262	18	50				42	if(defined $self->force_title) {
263	0					0	$title = $self->force_title;
264	0					0	DEBUG and print STDERR "Forcing title to be $title\n";
265							} else {
266							# Actually try looking for the title in the document:
267	18					64	$title = $self->get_short_title();
268	18	100				75	unless($self->content_seen) {
269	1					5	DEBUG and print STDERR "No content seen in search for title.\n";
270	1					5	return;
271							}
272	17					39	$self->{'Title'} = $title;
273
274	17	100	66			83	if(defined $title and $title =~ m/\S/) {
275	14					40	$title = $self->title_prefix . esc($title) . $self->title_postfix;
276							} else {
277	3					11	$title = $self->default_title;
278	3	50				11	$title = '' unless defined $title;
279	3					5	DEBUG and print STDERR "Title defaults to $title\n";
280							}
281							}
282
283
284	17		50			52	my $after = $self->html_header_after_title \|\| '';
285	17	100				49	if($self->html_css) {
286	10	50				20	my $link =
287							$self->html_css =~ m/
288							? $self->html_css # It's a big blob of markup, let's drop it in
289							: sprintf( # It's just a URL, so let's wrap it up
290							qq[\n],
291							$self->html_css,
292							);
293	10					120	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
294							}
295	17					71	$self->_add_top_anchor(\$after);
296
297	17	100				53	if($self->html_javascript) {
298	10	50				27	my $link =
299							$self->html_javascript =~ m/
300							? $self->html_javascript # It's a big blob of markup, let's drop it in
301							: sprintf( # It's just a URL, so let's wrap it up
302							qq[\n],
303							$self->html_javascript,
304							);
305	10					110	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
306							}
307
308	17		50			31	print {$self->{'output_fh'}}
	17					56
309							$self->html_header_before_title \|\| '',
310							$title, # already escaped
311							$after,
312							;
313
314	17					33	DEBUG and print STDERR "Returning from do_beginning...\n";
315	17					92	return 1;
316							}
317
318							sub _add_top_anchor {
319	17			17		33	my($self, $text_r) = @_;
320	17	100	66			150	unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
321	7		50			49	$$text_r .= $self->top_anchor \|\| '';
322							}
323	17					73	return;
324							}
325
326							sub version_tag_comment {
327	59			59	0	90	my $self = shift;
328							return sprintf
329							"\n",
330							esc(
331							ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
332	59		33			1494	$], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)),
333							), $self->_modnote(),
334							;
335							}
336
337							sub _modnote {
338	59		33	59		184	my $class = ref($_[0]) \|\| $_[0];
339	59					1450	return join "\n " => grep m/\S/, split "\n",
340
341							qq{
342							If you want to change this HTML document, you probably shouldn't do that
343							by changing it directly. Instead, see about changing the calling options
344							to $class, and/or subclassing $class,
345							then reconverting this document from the Pod source.
346							When in doubt, email the author of $class for advice.
347							See 'perldoc $class' for more info.
348							};
349
350							}
351
352							sub do_end {
353	17			17	0	34	my $self = $_[0];
354	17		50			36	print {$self->{'output_fh'}} $self->html_footer \|\| '';
	17					86
355	17					91	return 1;
356							}
357
358							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
359							# Normally this would just be a call to _do_middle_main_loop -- but we
360							# have to do some elaborate things to emit all the content and then
361							# summarize it and output it /before/ the content that it's a summary of.
362
363							sub do_middle {
364	58			58	0	85	my $self = $_[0];
365	58	100				163	return $self->_do_middle_main_loop unless $self->index;
366
367	10	50				34	if( $self->output_string ) {
368							# An efficiency hack
369	0					0	my $out = $self->output_string; #it's a reference to it
370	0					0	my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
371	0					0	$$out .= $sneakytag;
372	0					0	$self->_do_middle_main_loop;
373	0					0	$sneakytag = quotemeta($sneakytag);
374	0					0	my $index = $self->index_as_html();
375	0	0				0	if( $$out =~ s/$sneakytag/$index/s ) {
376							# Expected case
377	0					0	DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
378							} else {
379	0					0	DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
380							# I don't think this should ever happen.
381							}
382	0					0	return 1;
383							}
384
385	10	50				47	unless( $self->output_fh ) {
386	0					0	require Carp;
387	0					0	Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
388							}
389
390							# If we get here, we're outputting to a FH. So we need to do some magic.
391							# Namely, divert all content to a string, which we output after the index.
392	10					21	my $fh = $self->output_fh;
393	10					26	my $content = '';
394							{
395							# Our horrible bait and switch:
396	10					13	$self->output_string( \$content );
	10					27
397	10					33	$self->_do_middle_main_loop;
398	10					37	$self->abandon_output_string();
399	10					21	$self->output_fh($fh);
400							}
401	10					28	print $fh $self->index_as_html();
402	10					41	print $fh $content;
403
404	10					51	return 1;
405							}
406
407							###########################################################################
408
409							sub index_as_html {
410	10			10	0	14	my $self = $_[0];
411							# This is meant to be called AFTER the input document has been parsed!
412
413	10		50			34	my $points = $self->{'PSHTML_index_points'} \|\| [];
414
415	10	100				39	@$points > 1 or return qq[ \n];
416							# There's no point in having a 0-item or 1-item index, I dare say.
417
418	8					21	my(@out) = qq{\n };
419	8					11	my $level = 0;
420
421	8					13	my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
422	8					25	foreach my $p (@$points, ['head0', '(end)']) {
423	26					58	($tagname, $text) = @$p;
424	26					54	$anchorname = $self->section_escape($text);
425	26	50				103	if( $tagname =~ m{^head(\d+)$} ) {
426	26					62	$target_level = 0 + $1;
427							} else { # must be some kinda list item
428	0	0				0	if($previous_tagname =~ m{^head\d+$} ) {
429	0					0	$target_level = $level + 1;
430							} else {
431	0					0	$target_level = $level; # no change needed
432							}
433							}
434
435							# Get to target_level by opening or closing ULs
436	26					56	while($level > $target_level)
437	9					18	{ --$level; push @out, (" " x $level) . ""; }
	9					35
438	26					52	while($level < $target_level)
439	9					14	{ ++$level; push @out, (" " x ($level-1))
	9					109
440							. " "; }
441
442	26					34	$previous_tagname = $tagname;
443	26	100				53	next unless $level;
444
445	18					35	$indent = ' ' x $level;
446	18					36	push @out, sprintf
447							"%s %s",
448							$indent, $level, esc($anchorname), esc($text)
449							;
450							}
451	8					16	push @out, "\n";
452	8					46	return join "\n", @out;
453							}
454
455							###########################################################################
456
457							sub _do_middle_main_loop {
458	58			58		90	my $self = $_[0];
459	58					89	my $fh = $self->{'output_fh'};
460	58					89	my $tagmap = $self->{'Tagmap'};
461
462	58					150	$self->__adjust_html_h_levels;
463
464	58					135	my($token, $type, $tagname, $linkto, $linktype);
465	58					0	my @stack;
466	58					79	my $dont_wrap = 0;
467
468	58					150	while($token = $self->get_token) {
469
470							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
471	515	100				1224	if( ($type = $token->type) eq 'start' ) {
		100
		50
472	200	100	100			422	if(($tagname = $token->tagname) eq 'L') {
		100
		100
473	22		50			70	$linktype = $token->attr('type') \|\| 'insane';
474
475	22					56	$linkto = $self->do_link($token);
476
477	22	50	33			87	if(defined $linkto and length $linkto) {
478	22					56	esc($linkto);
479							# (Yes, SGML-escaping applies on top of %-escaping!
480							# But it's rarely noticeable in practice.)
481	22					106	print $fh qq{};
482							} else {
483	0					0	print $fh ""; # Yes, an 'a' element with no attributes!
484							}
485
486							} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
487	37		50			173	print $fh $tagmap->{$tagname} \|\| next;
488
489	37					78	my @to_unget;
490	37					54	while(1) {
491	85					198	push @to_unget, $self->get_token;
492	85	100	100			222	last if $to_unget[-1]->is_end
493							and $to_unget[-1]->tagname eq $tagname;
494
495							# TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
496							}
497
498	37					122	my $name = $self->linearize_tokens(@to_unget);
499	37	50				131	$name = $self->do_section($name, $token) if defined $name;
500
501	37					118	print $fh "
502	37	100				190	if ($tagname =~ m/^head\d$/s) {
503	33	100				106	print $fh "class='u'", $self->index
504							? " href='#___top' title='click to go to top of document'\n"
505							: "\n";
506							}
507
508	37	50				85	if(defined $name) {
509	37					98	my $esc = esc( $self->section_name_tidy( $name ) );
510	37					147	print $fh qq[name="$esc"];
511	37					53	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
512							" tokens as \"$name\".\n";
513	31					115	push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
514	37	100				101	if $ToIndex{ $tagname };
515							# Obviously, this discards all formatting codes (saving
516							# just their content), but ahwell.
517
518							} else { # ludicrously long, so nevermind
519	0					0	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
520							" tokens, but it was too long, so nevermind.\n";
521							}
522	37					98	print $fh "\n>";
523	37					106	$self->unget_token(@to_unget);
524
525							} elsif ($tagname eq 'Data') {
526	4					10	my $next = $self->get_token;
527	4	50				10	next unless defined $next;
528	4	50				11	unless( $next->type eq 'text' ) {
529	0					0	$self->unget_token($next);
530	0					0	next;
531							}
532	4					7	DEBUG and print STDERR " raw text ", $next->text, "\n";
533							# The parser sometimes preserves newlines and sometimes doesn't!
534	4					11	(my $text = $next->text) =~ s/\n\z//;
535	4					17	print $fh $text, "\n";
536	4					13	next;
537
538							} else {
539	137	100	33			503	if( $tagname =~ m/^over-/s ) {
		50	33
540	3					15	push @stack, '';
541							} elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
542	0					0	print $fh $stack[-1];
543	0					0	$stack[-1] = '';
544							}
545	137		100			574	print $fh $tagmap->{$tagname} \|\| next;
546	78	100	66			452	++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
			100
547							or $tagname eq 'X';
548							}
549
550							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
551							} elsif( $type eq 'end' ) {
552	200	100	66			424	if( ($tagname = $token->tagname) =~ m/^over-/s ) {
		100
553	3	50				81	if( my $end = pop @stack ) {
554	3					13	print $fh $end;
555							}
556							} elsif( $tagname =~ m/^item-/s and @stack) {
557	4					37	$stack[-1] = $tagmap->{"/$tagname"};
558	4	50	33			21	if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
559	4					18	$self->unget_token($next);
560	4	100				14	if( $next->type eq 'start' ) {
561	3					16	print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
562	3					10	$stack[-1] = $tagmap->{"/item-body"};
563							}
564							}
565	4					10	next;
566							}
567	196		100			724	print $fh $tagmap->{"/$tagname"} \|\| next;
568	137	100	66			600	--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
569
570							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
571							} elsif( $type eq 'text' ) {
572	115					283	esc($type = $token->text); # reuse $type, why not
573	115	100				366	$type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
574	115					289	print $fh $type;
575							}
576
577							}
578	58					242	return 1;
579							}
580
581							###########################################################################
582							#
583
584							sub do_section {
585	36			36	0	93	my($self, $name, $token) = @_;
586	36					64	return $name;
587							}
588
589							sub do_link {
590	22			22	0	42	my($self, $token) = @_;
591	22					43	my $type = $token->attr('type');
592	22	50				71	if(!defined $type) {
		100
		100
		50
593	0					0	$self->whine("Typeless L!?", $token->attr('start_line'));
594	10					25	} elsif( $type eq 'pod') { return $self->do_pod_link($token);
595	9					23	} elsif( $type eq 'url') { return $self->do_url_link($token);
596	3					12	} elsif( $type eq 'man') { return $self->do_man_link($token);
597							} else {
598	0					0	$self->whine("L of unknown type $type!?", $token->attr('start_line'));
599							}
600	0					0	return 'FNORG'; # should never get called
601							}
602
603							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
604
605	9			9	0	18	sub do_url_link { return $_[1]->attr('to') }
606
607							sub do_man_link {
608	3			3	0	5	my ($self, $link) = @_;
609	3					7	my $to = $link->attr('to');
610	3					8	my $frag = $link->attr('section');
611
612	3	50	33			15	return undef unless defined $to and length $to; # should never happen
613
614	3	100	66			36	$frag = $self->section_escape($frag)
615							if defined $frag and length($frag .= ''); # (stringify)
616
617	3					5	DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
618
619	3					12	return $self->resolve_man_page_link($to, $frag);
620							}
621
622
623							sub do_pod_link {
624							# And now things get really messy...
625	10			10	0	16	my($self, $link) = @_;
626	10					20	my $to = $link->attr('to');
627	10					20	my $section = $link->attr('section');
628							return undef unless( # should never happen
629	10	50	66			45	(defined $to and length $to) or
			33
			66
630							(defined $section and length $section)
631							);
632
633	10	100	66			35	$section = $self->section_escape($section)
634							if defined $section and length($section .= ''); # (stringify)
635
636	10					17	DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
637							$to \|\| "(nil)", $section \|\| "(nil)";
638
639							{
640							# An early hack:
641	10					12	my $complete_url = $self->resolve_pod_link_by_table($to, $section);
	10					23
642	10	50				33	if( $complete_url ) {
643	0					0	DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
644							$complete_url, "\n (Returning that.)\n";
645	0					0	return $complete_url;
646							} else {
647	10					19	DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
648							" didn't return anything interesting.\n";
649							}
650							}
651
652	10	100	66			32	if(defined $to and length $to) {
653							# Give this routine first hack again
654	7					17	my $there = $self->resolve_pod_link_by_table($to);
655	7	50	33			23	if(defined $there and length $there) {
656	0					0	DEBUG > 1
657							and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
658							} else {
659	7					22	$there =
660							$self->resolve_pod_page_link($to, $section);
661							# (I pass it the section value, but I don't see a
662							# particular reason it'd use it.)
663	7					11	DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there \|\| "(nil)", "\n";
664	7	50	33			24	unless( defined $there and length $there ) {
665	0					0	DEBUG and print STDERR "Can't resolve $to\n";
666	0					0	return undef;
667							}
668							# resolve_pod_page_link returning undef is how it
669							# can signal that it gives up on making a link
670							}
671	7					16	$to = $there;
672							}
673
674							#DEBUG and print STDERR "So far [", $to\|\|'nil', "] [", $section\|\|'nil', "]\n";
675
676	10	100	66			33	my $out = (defined $to and length $to) ? $to : '';
677	10	100	66			36	$out .= "#" . $section if defined $section and length $section;
678
679	10	50				21	unless(length $out) { # sanity check
680	0					0	DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
681							$to \|\| "(nil)", $section \|\| "(nil)";
682	0					0	return undef;
683							}
684
685	10					11	DEBUG and print STDERR "Resolved to $out\n";
686	10					26	return $out;
687							}
688
689
690							# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
691
692							sub section_escape {
693	32			32	0	59	my($self, $section) = @_;
694	32					64	return $self->section_url_escape(
695							$self->section_name_tidy($section)
696							);
697							}
698
699							sub section_name_tidy {
700	69			69	0	128	my($self, $section) = @_;
701	69					175	$section =~ s/^\s+//;
702	69					162	$section =~ s/\s+$//;
703	69					122	$section =~ tr/ /_/;
704	69	50				289	if ($] ge 5.006) {
705	69					143	$section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
706							} elsif ('A' eq chr(65)) { # But not on early EBCDIC
707	0					0	$section =~ tr/\x00-\x1F\x80-\x9F//d;
708							}
709	69					142	$section = $self->unicode_escape_url($section);
710	69	50				145	$section = '_' unless length $section;
711	69					166	return $section;
712							}
713
714	32			32	0	78	sub section_url_escape { shift->general_url_escape(@_) }
715	7			7	0	15	sub pagepath_url_escape { shift->general_url_escape(@_) }
716	3			3	0	8	sub manpage_url_escape { shift->general_url_escape(@_) }
717
718							sub general_url_escape {
719	42			42	0	79	my($self, $string) = @_;
720
721	42					73	$string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
	0					0
722							# express Unicode things as urlencode(utf(orig)).
723
724							# A pretty conservative escaping, behoovey even for query components
725							# of a URL (see RFC 2396)
726
727	42	50				136	if ($] ge 5.007_003) {
728	42					80	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
	8					43
729							} else { # Is broken for non-ASCII platforms on early perls
730	0					0	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
	0					0
731							}
732							# Yes, stipulate the list without a range, so that this can work right on
733							# all charsets that this module happens to run under.
734
735	42					95	return $string;
736							}
737
738							#--------------------------------------------------------------------------
739							#
740							# Oh look, a yawning portal to Hell! Let's play touch football right by it!
741							#
742
743							sub resolve_pod_page_link {
744							# resolve_pod_page_link must return a properly escaped URL
745	7			7	0	11	my $self = shift;
746	7	50				19	return $self->batch_mode()
747							? $self->resolve_pod_page_link_batch_mode(@_)
748							: $self->resolve_pod_page_link_singleton_mode(@_)
749							;
750							}
751
752							sub resolve_pod_page_link_singleton_mode {
753	7			7	0	23	my($self, $it) = @_;
754	7	50	33			25	return undef unless defined $it and length $it;
755	7					37	my $url = $self->pagepath_url_escape($it);
756
757	7					15	$url =~ s{::$}{}s; # probably never comes up anyway
758	7	50				22	$url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
759
760	7	50				17	return undef unless length $url;
761	7					18	return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
762							}
763
764							sub resolve_pod_page_link_batch_mode {
765	0			0	0	0	my($self, $to) = @_;
766	0					0	DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
767	0					0	my @path = grep length($_), split m/::/s, $to, -1;
768	0	0				0	unless( @path ) { # sanity
769	0					0	DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
770	0					0	return undef;
771							}
772	0					0	$self->batch_mode_rectify_path(\@path);
773	0					0	my $out = join('/', map $self->pagepath_url_escape($_), @path)
774							. $HTML_EXTENSION;
775	0					0	DEBUG > 1 and print STDERR " => $out\n";
776	0					0	return $out;
777							}
778
779							sub batch_mode_rectify_path {
780	0			0	0	0	my($self, $pathbits) = @_;
781	0					0	my $level = $self->batch_mode_current_level;
782	0					0	$level--; # how many levels up to go to get to the root
783	0	0				0	if($level < 1) {
784	0					0	unshift @$pathbits, '.'; # just to be pretty
785							} else {
786	0					0	unshift @$pathbits, ('..') x $level;
787							}
788	0					0	return;
789							}
790
791							sub resolve_man_page_link {
792	3			3	0	9	my ($self, $to, $frag) = @_;
793	3					7	my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
794
795	3	50	33			17	return undef unless defined $page and length $page;
796	3		50			21	$section \|\|= 1;
797
798	3					11	return $self->man_url_prefix . "$section/"
799							. $self->manpage_url_escape($page)
800							. $self->man_url_postfix;
801							}
802
803							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
804
805							sub resolve_pod_link_by_table {
806							# A crazy hack to allow specifying custom L => URL mappings
807
808	17	50		17	0	60	return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
809
810	0					0	my($self, $to, $section) = @_;
811
812							# TODO: add a method that actually populates podhtml_LOT from a file?
813
814	0	0				0	if(defined $section) {
815	0	0	0			0	$to = '' unless defined $to and length $to;
816	0					0	return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
817							} else {
818	0					0	return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
819							}
820	0					0	return;
821							}
822
823							###########################################################################
824
825							sub linearize_tokens { # self, tokens
826	37			37	0	58	my $self = shift;
827	37					139	my $out = '';
828
829	37					58	my $t;
830	37					156	while($t = shift @_) {
831	79	50	33			419	if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
		100	66
		100
832	0					0	$out .= $t; # a string, or some insane thing
833							} elsif($t->is_text) {
834	39					102	$out .= $t->text;
835							} elsif($t->is_start and $t->tag eq 'X') {
836							# Ignore until the end of this X<...> sequence:
837	3					7	my $x_open = 1;
838	3					6	while($x_open) {
839	6	100				17	next if( ($t = shift @_)->is_text );
840	3	50	33			9	if( $t->is_start and $t->tag eq 'X') { ++$x_open }
	0	50	33			0
841	3					11	elsif($t->is_end and $t->tag eq 'X') { --$x_open }
842							}
843							}
844							}
845	37	50				95	return undef if length $out > $Linearization_Limit;
846	37					90	return $out;
847							}
848
849							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
850
851							sub unicode_escape_url {
852	69			69	0	136	my($self, $string) = @_;
853	69					120	$string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
854							# Turn char 1234 into "(1234)"
855	69					134	return $string;
856							}
857
858							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
859							sub esc { # a function.
860	462	100		462	0	1013	if(defined wantarray) {
861	325	100				512	if(wantarray) {
862	274					702	@_ = splice @_; # break aliasing
863							} else {
864	51					74	my $x = shift;
865	51	50				192	if ($] ge 5.007_003) {
866	51					99	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
	0					0
867							} else { # Is broken for non-ASCII platforms on early perls
868	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
	0					0
869							}
870	51					178	return $x;
871							}
872							}
873	411					683	foreach my $x (@_) {
874							# Escape things very cautiously:
875	1094	50				1784	if (defined $x) {
876	1094	50				2853	if ($] ge 5.007_003) {
877	1094					2138	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
	52					190
878							} else { # Is broken for non-ASCII platforms on early perls
879	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
	0					0
880							}
881							}
882							# Leave out "- so that "--" won't make it thru in X-generated comments
883							# with text in them.
884
885							# Yes, stipulate the list without a range, so that this can work right on
886							# all charsets that this module happens to run under.
887							}
888	411					1287	return @_;
889							}
890
891							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
892
893							1;
894							__END__

File Coverage

", # And also stick in an

", # ''

", # ''

", # ''

", # ''

", # ''