File Coverage

lib/Pod/Simple/HTML.pm

Criterion	Covered	Total	%
statement	288	359	80.2
branch	121	178	67.9
condition	66	121	54.5
subroutine	36	39	92.3
pod	1	31	3.2
total	512	728	70.3

line	stmt	bran	cond	sub	pod	time	code
1							require 5;
2							package Pod::Simple::HTML;
3	7			7		17754	use strict;
	7					27
	7					183
4	7			7		3228	use Pod::Simple::PullParser ();
	7					15
	7					232
5	7					1115	use vars qw(
6							@ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
7							$Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
8							$Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
9							$Doctype_decl $Content_decl
10	7			7		37	);
	7					11
11							@ISA = ('Pod::Simple::PullParser');
12							$VERSION = '3.43';
13							BEGIN {
14	7	50		7		51	if(defined &DEBUG) { } # no-op
		50
15	7					34041	elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
16	0					0	else { *DEBUG = sub () {0}; }
17							}
18
19							$Doctype_decl \|\|= ''; # No. Just No. Don't even ask me for it.
20							# qq{
21							# "http://www.w3.org/TR/html4/loose.dtd">\n};
22
23							$Content_decl \|\|=
24							q{};
25
26							$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
27							$Computerese = "" unless defined $Computerese;
28							$LamePad = '' unless defined $LamePad;
29
30							$Linearization_Limit = 120 unless defined $Linearization_Limit;
31							# headings/items longer than that won't get an
32							$Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
33							unless defined $Perldoc_URL_Prefix;
34							$Perldoc_URL_Postfix = ''
35							unless defined $Perldoc_URL_Postfix;
36
37
38							$Man_URL_Prefix = 'http://man.he.net/man';
39							$Man_URL_Postfix = '';
40
41							$Title_Prefix = '' unless defined $Title_Prefix;
42							$Title_Postfix = '' unless defined $Title_Postfix;
43							%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
44							# 'item-text' stuff in the index doesn't quite work, and may
45							# not be a good idea anyhow.
46
47
48							__PACKAGE__->_accessorize(
49							'perldoc_url_prefix',
50							# In turning L into http://whatever/Foo%3a%3aBar, what
51							# to put before the "Foo%3a%3aBar".
52							# (for singleton mode only?)
53							'perldoc_url_postfix',
54							# what to put after "Foo%3a%3aBar" in the URL. Normally "".
55
56							'man_url_prefix',
57							# In turning L into http://whatever/man/1/crontab, what
58							# to put before the "1/crontab".
59							'man_url_postfix',
60							# what to put after the "1/crontab" in the URL. Normally "".
61
62							'batch_mode', # whether we're in batch mode
63							'batch_mode_current_level',
64							# When in batch mode, how deep the current module is: 1 for "LWP",
65							# 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
66
67							'title_prefix', 'title_postfix',
68							# What to put before and after the title in the head.
69							# Should already be &-escaped
70
71							'html_h_level',
72
73							'html_header_before_title',
74							'html_header_after_title',
75							'html_footer',
76							'top_anchor',
77
78							'index', # whether to add an index at the top of each page
79							# (actually it's a table-of-contents, but we'll call it an index,
80							# out of apparently longstanding habit)
81
82							'html_css', # URL of CSS file to point to
83							'html_javascript', # URL of Javascript file to point to
84
85							'force_title', # should already be &-escaped
86							'default_title', # should already be &-escaped
87							);
88
89							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90							my @_to_accept;
91
92							%Tagmap = (
93							'Verbatim' => "\n",
94							'/Verbatim' => "\n",
95							'VerbatimFormatted' => "\n",
96							'/VerbatimFormatted' => "\n",
97							'VerbatimB' => "",
98							'/VerbatimB' => "",
99							'VerbatimI' => "",
100							'/VerbatimI' => "",
101							'VerbatimBI' => "",
102							'/VerbatimBI' => "",
103
104
105							'Data' => "\n",
106							'/Data' => "\n",
107
108							'head1' => "\n ", # And also stick in an
109							'head2' => "\n ", # ''
110							'head3' => "\n ", # ''
111							'head4' => "\n ", # ''
112							'head5' => "\n ", # ''
113							'head6' => "\n ", # ''
114							'/head1' => "\n",
115							'/head2' => "\n",
116							'/head3' => "\n",
117							'/head4' => "\n",
118							'/head5' => "\n",
119							'/head6' => "\n",
120
121							'X' => "",
123
124							changes(qw(
125							Para=p
126							B=b I=i
127							over-bullet=ul
128							over-number=ol
129							over-text=dl
130							over-block=blockquote
131							item-bullet=li
132							item-number=li
133							item-text=dt
134							)),
135							changes2(
136							map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
137							qw[
138							sample=samp
139							definition=dfn
140							keyboard=kbd
141							variable=var
142							citation=cite
143							abbreviation=abbr
144							acronym=acronym
145							subscript=sub
146							superscript=sup
147							big=big
148							small=small
149							underline=u
150							strikethrough=s
151							preformat=pre
152							teletype=tt
153							] # no point in providing a way to get ..., I think
154							),
155
156							'/item-bullet' => "$LamePad\n",
157							'/item-number' => "$LamePad\n",
158							'/item-text' => "$LamePad\n",
159							'item-body' => "\n ",
160							'/item-body' => "\n",
161
162
163							'B' => "", '/B' => "",
164							'I' => "", '/I' => "",
165							'F' => "", '/F' => "",
166							'C' => "", '/C' => "",
167							'L' => "", # ideally never used!
168							'/L' => "",
169							);
170
171							sub changes {
172	7	50		7	0	15	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	70					357
173							? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
174							} @_;
175							}
176							sub changes2 {
177	7	50		7	0	14	return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
	105					816
178							? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
179							} @_;
180							}
181
182							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183	0			0	0	0	sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
	0					0
184							# Just so we can run from the command line. No options.
185							# For that, use perldoc!
186							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187
188							sub new {
189	59			59	1	536	my $new = shift->SUPER::new(@_);
190							#$new->nix_X_codes(1);
191	59					207	$new->nbsp_for_S(1);
192	59					165	$new->accept_targets( 'html', 'HTML' );
193	59					153	$new->accept_codes('VerbatimFormatted');
194	59					162	$new->accept_codes(@_to_accept);
195	59					63	DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
196
197	59					177	$new->perldoc_url_prefix( $Perldoc_URL_Prefix );
198	59					144	$new->perldoc_url_postfix( $Perldoc_URL_Postfix );
199	59					133	$new->man_url_prefix( $Man_URL_Prefix );
200	59					148	$new->man_url_postfix( $Man_URL_Postfix );
201	59					148	$new->title_prefix( $Title_Prefix );
202	59					127	$new->title_postfix( $Title_Postfix );
203
204	59					197	$new->html_header_before_title(
205							qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="206">206</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="207">207</a> </td> <td class="c3" > 59 </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > 148 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="208">208</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> "",
209							$Content_decl,
210							"\n",
211							$new->version_tag_comment,
212							"\n",
213							);
214	59					190	$new->html_footer( qq[\n\n\n\n] );
215	59					145	$new->top_anchor( "\n" );
216
217	59					2350	$new->{'Tagmap'} = {%Tagmap};
218
219	59					374	return $new;
220							}
221
222							sub __adjust_html_h_levels {
223	58			58		101	my ($self) = @_;
224	58					85	my $Tagmap = $self->{'Tagmap'};
225
226	58					112	my $add = $self->html_h_level;
227	58	100				121	return unless defined $add;
228	1	50	50			6	return if ($self->{'Adjusted_html_h_levels'}\|\|0) == $add;
229
230	1					2	$add -= 1;
231	1					4	for (1 .. 6) {
232	6					42	$Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
	6					23
233	6					35	$Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
	6					17
234							}
235							}
236
237							sub batch_mode_page_object_init {
238	10			10	0	23	my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
239	10					25	DEBUG and print STDERR "Initting $self\n for $module\n",
240							" in $infile\n out $outfile\n depth $depth\n";
241	10					32	$self->batch_mode(1);
242	10					27	$self->batch_mode_current_level($depth);
243	10					18	return $self;
244							}
245
246							sub run {
247	59			59	0	73	my $self = $_[0];
248	59	100				129	return $self->do_middle if $self->bare_output;
249							return
250	18		100			37	$self->do_beginning && $self->do_middle && $self->do_end;
251							}
252
253							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254
255							sub do_beginning {
256	18			18	0	22	my $self = $_[0];
257
258	18					26	my $title;
259
260	18	50				43	if(defined $self->force_title) {
261	0					0	$title = $self->force_title;
262	0					0	DEBUG and print STDERR "Forcing title to be $title\n";
263							} else {
264							# Actually try looking for the title in the document:
265	18					72	$title = $self->get_short_title();
266	18	100				64	unless($self->content_seen) {
267	1					2	DEBUG and print STDERR "No content seen in search for title.\n";
268	1					4	return;
269							}
270	17					33	$self->{'Title'} = $title;
271
272	17	100	66			75	if(defined $title and $title =~ m/\S/) {
273	14					34	$title = $self->title_prefix . esc($title) . $self->title_postfix;
274							} else {
275	3					8	$title = $self->default_title;
276	3	50				8	$title = '' unless defined $title;
277	3					4	DEBUG and print STDERR "Title defaults to $title\n";
278							}
279							}
280
281
282	17		50			38	my $after = $self->html_header_after_title \|\| '';
283	17	100				70	if($self->html_css) {
284	10	50				19	my $link =
285							$self->html_css =~ m/
286							? $self->html_css # It's a big blob of markup, let's drop it in
287							: sprintf( # It's just a URL, so let's wrap it up
288							qq[\n],
289							$self->html_css,
290							);
291	10					95	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
292							}
293	17					53	$self->_add_top_anchor(\$after);
294
295	17	100				37	if($self->html_javascript) {
296	10	50				17	my $link =
297							$self->html_javascript =~ m/
298							? $self->html_javascript # It's a big blob of markup, let's drop it in
299							: sprintf( # It's just a URL, so let's wrap it up
300							qq[\n],
301							$self->html_javascript,
302							);
303	10					104	$after =~ s{()}{$link\n$1}i; # otherwise nevermind
304							}
305
306	17		50			26	print {$self->{'output_fh'}}
	17					42
307							$self->html_header_before_title \|\| '',
308							$title, # already escaped
309							$after,
310							;
311
312	17					24	DEBUG and print STDERR "Returning from do_beginning...\n";
313	17					69	return 1;
314							}
315
316							sub _add_top_anchor {
317	17			17		29	my($self, $text_r) = @_;
318	17	100	66			108	unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
319	7		50			24	$$text_r .= $self->top_anchor \|\| '';
320							}
321	17					36	return;
322							}
323
324							sub version_tag_comment {
325	59			59	0	76	my $self = shift;
326							return sprintf
327							"\n",
328							esc(
329							ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
330	59		33			1325	$], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} \|\| time)),
331							), $self->_modnote(),
332							;
333							}
334
335							sub _modnote {
336	59		33	59		137	my $class = ref($_[0]) \|\| $_[0];
337	59					1206	return join "\n " => grep m/\S/, split "\n",
338
339							qq{
340							If you want to change this HTML document, you probably shouldn't do that
341							by changing it directly. Instead, see about changing the calling options
342							to $class, and/or subclassing $class,
343							then reconverting this document from the Pod source.
344							When in doubt, email the author of $class for advice.
345							See 'perldoc $class' for more info.
346							};
347
348							}
349
350							sub do_end {
351	17			17	0	33	my $self = $_[0];
352	17		50			18	print {$self->{'output_fh'}} $self->html_footer \|\| '';
	17					67
353	17					67	return 1;
354							}
355
356							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357							# Normally this would just be a call to _do_middle_main_loop -- but we
358							# have to do some elaborate things to emit all the content and then
359							# summarize it and output it /before/ the content that it's a summary of.
360
361							sub do_middle {
362	58			58	0	81	my $self = $_[0];
363	58	100				128	return $self->_do_middle_main_loop unless $self->index;
364
365	10	50				27	if( $self->output_string ) {
366							# An efficiency hack
367	0					0	my $out = $self->output_string; #it's a reference to it
368	0					0	my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
369	0					0	$$out .= $sneakytag;
370	0					0	$self->_do_middle_main_loop;
371	0					0	$sneakytag = quotemeta($sneakytag);
372	0					0	my $index = $self->index_as_html();
373	0	0				0	if( $$out =~ s/$sneakytag/$index/s ) {
374							# Expected case
375	0					0	DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
376							} else {
377	0					0	DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
378							# I don't think this should ever happen.
379							}
380	0					0	return 1;
381							}
382
383	10	50				20	unless( $self->output_fh ) {
384	0					0	require Carp;
385	0					0	Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
386							}
387
388							# If we get here, we're outputting to a FH. So we need to do some magic.
389							# Namely, divert all content to a string, which we output after the index.
390	10					16	my $fh = $self->output_fh;
391	10					17	my $content = '';
392							{
393							# Our horrible bait and switch:
394	10					18	$self->output_string( \$content );
	10					25
395	10					31	$self->_do_middle_main_loop;
396	10					28	$self->abandon_output_string();
397	10					19	$self->output_fh($fh);
398							}
399	10					23	print $fh $self->index_as_html();
400	10					22	print $fh $content;
401
402	10					39	return 1;
403							}
404
405							###########################################################################
406
407							sub index_as_html {
408	10			10	0	12	my $self = $_[0];
409							# This is meant to be called AFTER the input document has been parsed!
410
411	10		50			20	my $points = $self->{'PSHTML_index_points'} \|\| [];
412
413	10	100				26	@$points > 1 or return qq[ \n];
414							# There's no point in having a 0-item or 1-item index, I dare say.
415
416	8					15	my(@out) = qq{\n };
417	8					10	my $level = 0;
418
419	8					12	my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
420	8					22	foreach my $p (@$points, ['head0', '(end)']) {
421	26					50	($tagname, $text) = @$p;
422	26					42	$anchorname = $self->section_escape($text);
423	26	50				80	if( $tagname =~ m{^head(\d+)$} ) {
424	26					51	$target_level = 0 + $1;
425							} else { # must be some kinda list item
426	0	0				0	if($previous_tagname =~ m{^head\d+$} ) {
427	0					0	$target_level = $level + 1;
428							} else {
429	0					0	$target_level = $level; # no change needed
430							}
431							}
432
433							# Get to target_level by opening or closing ULs
434	26					44	while($level > $target_level)
435	9					13	{ --$level; push @out, (" " x $level) . ""; }
	9					22
436	26					46	while($level < $target_level)
437	9					11	{ ++$level; push @out, (" " x ($level-1))
	9					36
438							. " "; }
439
440	26					33	$previous_tagname = $tagname;
441	26	100				43	next unless $level;
442
443	18					22	$indent = ' ' x $level;
444	18					38	push @out, sprintf
445							"%s %s",
446							$indent, $level, esc($anchorname), esc($text)
447							;
448							}
449	8					20	push @out, "\n";
450	8					36	return join "\n", @out;
451							}
452
453							###########################################################################
454
455							sub _do_middle_main_loop {
456	58			58		87	my $self = $_[0];
457	58					76	my $fh = $self->{'output_fh'};
458	58					71	my $tagmap = $self->{'Tagmap'};
459
460	58					130	$self->__adjust_html_h_levels;
461
462	58					113	my($token, $type, $tagname, $linkto, $linktype);
463	58					0	my @stack;
464	58					78	my $dont_wrap = 0;
465
466	58					136	while($token = $self->get_token) {
467
468							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
469	515	100				952	if( ($type = $token->type) eq 'start' ) {
		100
		50
470	200	100	100			355	if(($tagname = $token->tagname) eq 'L') {
		100
		100
471	22		50			75	$linktype = $token->attr('type') \|\| 'insane';
472
473	22					51	$linkto = $self->do_link($token);
474
475	22	50	33			73	if(defined $linkto and length $linkto) {
476	22					65	esc($linkto);
477							# (Yes, SGML-escaping applies on top of %-escaping!
478							# But it's rarely noticeable in practice.)
479	22					69	print $fh qq{};
480							} else {
481	0					0	print $fh ""; # Yes, an 'a' element with no attributes!
482							}
483
484							} elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
485	37		50			143	print $fh $tagmap->{$tagname} \|\| next;
486
487	37					45	my @to_unget;
488	37					37	while(1) {
489	85					152	push @to_unget, $self->get_token;
490	85	100	100			195	last if $to_unget[-1]->is_end
491							and $to_unget[-1]->tagname eq $tagname;
492
493							# TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
494							}
495
496	37					95	my $name = $self->linearize_tokens(@to_unget);
497	37	50				108	$name = $self->do_section($name, $token) if defined $name;
498
499	37					84	print $fh "
500	37	100				114	if ($tagname =~ m/^head\d$/s) {
501	33	100				80	print $fh "class='u'", $self->index
502							? " href='#___top' title='click to go to top of document'\n"
503							: "\n";
504							}
505
506	37	50				57	if(defined $name) {
507	37					78	my $esc = esc( $self->section_name_tidy( $name ) );
508	37					114	print $fh qq[name="$esc"];
509	37					44	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
510							" tokens as \"$name\".\n";
511	31					98	push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
512	37	100				90	if $ToIndex{ $tagname };
513							# Obviously, this discards all formatting codes (saving
514							# just their content), but ahwell.
515
516							} else { # ludicrously long, so nevermind
517	0					0	DEBUG and print STDERR "Linearized ", scalar(@to_unget),
518							" tokens, but it was too long, so nevermind.\n";
519							}
520	37					83	print $fh "\n>";
521	37					99	$self->unget_token(@to_unget);
522
523							} elsif ($tagname eq 'Data') {
524	4					8	my $next = $self->get_token;
525	4	50				7	next unless defined $next;
526	4	50				8	unless( $next->type eq 'text' ) {
527	0					0	$self->unget_token($next);
528	0					0	next;
529							}
530	4					6	DEBUG and print STDERR " raw text ", $next->text, "\n";
531							# The parser sometimes preserves newlines and sometimes doesn't!
532	4					5	(my $text = $next->text) =~ s/\n\z//;
533	4					22	print $fh $text, "\n";
534	4					13	next;
535
536							} else {
537	137	100	33			455	if( $tagname =~ m/^over-/s ) {
		50	33
538	3					10	push @stack, '';
539							} elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
540	0					0	print $fh $stack[-1];
541	0					0	$stack[-1] = '';
542							}
543	137		100			471	print $fh $tagmap->{$tagname} \|\| next;
544	78	100	66			383	++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
			100
545							or $tagname eq 'X';
546							}
547
548							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
549							} elsif( $type eq 'end' ) {
550	200	100	66			378	if( ($tagname = $token->tagname) =~ m/^over-/s ) {
		100
551	3	50				11	if( my $end = pop @stack ) {
552	3					8	print $fh $end;
553							}
554							} elsif( $tagname =~ m/^item-/s and @stack) {
555	4					12	$stack[-1] = $tagmap->{"/$tagname"};
556	4	50	33			30	if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
557	4					11	$self->unget_token($next);
558	4	100				10	if( $next->type eq 'start' ) {
559	3					10	print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
560	3					7	$stack[-1] = $tagmap->{"/item-body"};
561							}
562							}
563	4					19	next;
564							}
565	196		100			626	print $fh $tagmap->{"/$tagname"} \|\| next;
566	137	100	66			498	--$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
567
568							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
569							} elsif( $type eq 'text' ) {
570	115					211	esc($type = $token->text); # reuse $type, why not
571	115	100				323	$type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
572	115					247	print $fh $type;
573							}
574
575							}
576	58					189	return 1;
577							}
578
579							###########################################################################
580							#
581
582							sub do_section {
583	36			36	0	73	my($self, $name, $token) = @_;
584	36					51	return $name;
585							}
586
587							sub do_link {
588	22			22	0	35	my($self, $token) = @_;
589	22					39	my $type = $token->attr('type');
590	22	50				63	if(!defined $type) {
		100
		100
		50
591	0					0	$self->whine("Typeless L!?", $token->attr('start_line'));
592	10					24	} elsif( $type eq 'pod') { return $self->do_pod_link($token);
593	9					22	} elsif( $type eq 'url') { return $self->do_url_link($token);
594	3					10	} elsif( $type eq 'man') { return $self->do_man_link($token);
595							} else {
596	0					0	$self->whine("L of unknown type $type!?", $token->attr('start_line'));
597							}
598	0					0	return 'FNORG'; # should never get called
599							}
600
601							# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
602
603	9			9	0	16	sub do_url_link { return $_[1]->attr('to') }
604
605							sub do_man_link {
606	3			3	0	8	my ($self, $link) = @_;
607	3					5	my $to = $link->attr('to');
608	3					7	my $frag = $link->attr('section');
609
610	3	50	33			22	return undef unless defined $to and length $to; # should never happen
611
612	3	100	66			13	$frag = $self->section_escape($frag)
613							if defined $frag and length($frag .= ''); # (stringify)
614
615	3					4	DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
616
617	3					10	return $self->resolve_man_page_link($to, $frag);
618							}
619
620
621							sub do_pod_link {
622							# And now things get really messy...
623	10			10	0	19	my($self, $link) = @_;
624	10					19	my $to = $link->attr('to');
625	10					19	my $section = $link->attr('section');
626							return undef unless( # should never happen
627	10	50	66			44	(defined $to and length $to) or
			33
			66
628							(defined $section and length $section)
629							);
630
631	10	100	66			32	$section = $self->section_escape($section)
632							if defined $section and length($section .= ''); # (stringify)
633
634	10					12	DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
635							$to \|\| "(nil)", $section \|\| "(nil)";
636
637							{
638							# An early hack:
639	10					12	my $complete_url = $self->resolve_pod_link_by_table($to, $section);
	10					30
640	10	50				17	if( $complete_url ) {
641	0					0	DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
642							$complete_url, "\n (Returning that.)\n";
643	0					0	return $complete_url;
644							} else {
645	10					14	DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
646							" didn't return anything interesting.\n";
647							}
648							}
649
650	10	100	66			33	if(defined $to and length $to) {
651							# Give this routine first hack again
652	7					11	my $there = $self->resolve_pod_link_by_table($to);
653	7	50	33			18	if(defined $there and length $there) {
654	0					0	DEBUG > 1
655							and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
656							} else {
657	7					17	$there =
658							$self->resolve_pod_page_link($to, $section);
659							# (I pass it the section value, but I don't see a
660							# particular reason it'd use it.)
661	7					10	DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there \|\| "(nil)", "\n";
662	7	50	33			24	unless( defined $there and length $there ) {
663	0					0	DEBUG and print STDERR "Can't resolve $to\n";
664	0					0	return undef;
665							}
666							# resolve_pod_page_link returning undef is how it
667							# can signal that it gives up on making a link
668							}
669	7					11	$to = $there;
670							}
671
672							#DEBUG and print STDERR "So far [", $to\|\|'nil', "] [", $section\|\|'nil', "]\n";
673
674	10	100	66			31	my $out = (defined $to and length $to) ? $to : '';
675	10	100	66			32	$out .= "#" . $section if defined $section and length $section;
676
677	10	50				17	unless(length $out) { # sanity check
678	0					0	DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
679							$to \|\| "(nil)", $section \|\| "(nil)";
680	0					0	return undef;
681							}
682
683	10					11	DEBUG and print STDERR "Resolved to $out\n";
684	10					25	return $out;
685							}
686
687
688							# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
689
690							sub section_escape {
691	32			32	0	46	my($self, $section) = @_;
692	32					54	return $self->section_url_escape(
693							$self->section_name_tidy($section)
694							);
695							}
696
697							sub section_name_tidy {
698	69			69	0	123	my($self, $section) = @_;
699	69					156	$section =~ s/^\s+//;
700	69					130	$section =~ s/\s+$//;
701	69					109	$section =~ tr/ /_/;
702	69	50				270	if ($] ge 5.006) {
703	69					134	$section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
704							} elsif ('A' eq chr(65)) { # But not on early EBCDIC
705	0					0	$section =~ tr/\x00-\x1F\x80-\x9F//d;
706							}
707	69					131	$section = $self->unicode_escape_url($section);
708	69	50				133	$section = '_' unless length $section;
709	69					134	return $section;
710							}
711
712	32			32	0	58	sub section_url_escape { shift->general_url_escape(@_) }
713	7			7	0	18	sub pagepath_url_escape { shift->general_url_escape(@_) }
714	3			3	0	7	sub manpage_url_escape { shift->general_url_escape(@_) }
715
716							sub general_url_escape {
717	42			42	0	62	my($self, $string) = @_;
718
719	42					67	$string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
	0					0
720							# express Unicode things as urlencode(utf(orig)).
721
722							# A pretty conservative escaping, behoovey even for query components
723							# of a URL (see RFC 2396)
724
725	42	50				132	if ($] ge 5.007_003) {
726	42					67	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
	8					39
727							} else { # Is broken for non-ASCII platforms on early perls
728	0					0	$string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
	0					0
729							}
730							# Yes, stipulate the list without a range, so that this can work right on
731							# all charsets that this module happens to run under.
732
733	42					79	return $string;
734							}
735
736							#--------------------------------------------------------------------------
737							#
738							# Oh look, a yawning portal to Hell! Let's play touch football right by it!
739							#
740
741							sub resolve_pod_page_link {
742							# resolve_pod_page_link must return a properly escaped URL
743	7			7	0	12	my $self = shift;
744	7	50				20	return $self->batch_mode()
745							? $self->resolve_pod_page_link_batch_mode(@_)
746							: $self->resolve_pod_page_link_singleton_mode(@_)
747							;
748							}
749
750							sub resolve_pod_page_link_singleton_mode {
751	7			7	0	26	my($self, $it) = @_;
752	7	50	33			21	return undef unless defined $it and length $it;
753	7					22	my $url = $self->pagepath_url_escape($it);
754
755	7					16	$url =~ s{::$}{}s; # probably never comes up anyway
756	7	50				17	$url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
757
758	7	50				19	return undef unless length $url;
759	7					15	return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
760							}
761
762							sub resolve_pod_page_link_batch_mode {
763	0			0	0	0	my($self, $to) = @_;
764	0					0	DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
765	0					0	my @path = grep length($_), split m/::/s, $to, -1;
766	0	0				0	unless( @path ) { # sanity
767	0					0	DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
768	0					0	return undef;
769							}
770	0					0	$self->batch_mode_rectify_path(\@path);
771	0					0	my $out = join('/', map $self->pagepath_url_escape($_), @path)
772							. $HTML_EXTENSION;
773	0					0	DEBUG > 1 and print STDERR " => $out\n";
774	0					0	return $out;
775							}
776
777							sub batch_mode_rectify_path {
778	0			0	0	0	my($self, $pathbits) = @_;
779	0					0	my $level = $self->batch_mode_current_level;
780	0					0	$level--; # how many levels up to go to get to the root
781	0	0				0	if($level < 1) {
782	0					0	unshift @$pathbits, '.'; # just to be pretty
783							} else {
784	0					0	unshift @$pathbits, ('..') x $level;
785							}
786	0					0	return;
787							}
788
789							sub resolve_man_page_link {
790	3			3	0	9	my ($self, $to, $frag) = @_;
791	3					7	my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
792
793	3	50	33			16	return undef unless defined $page and length $page;
794	3		50			6	$section \|\|= 1;
795
796	3					10	return $self->man_url_prefix . "$section/"
797							. $self->manpage_url_escape($page)
798							. $self->man_url_postfix;
799							}
800
801							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
802
803							sub resolve_pod_link_by_table {
804							# A crazy hack to allow specifying custom L => URL mappings
805
806	17	50		17	0	47	return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
807
808	0					0	my($self, $to, $section) = @_;
809
810							# TODO: add a method that actually populates podhtml_LOT from a file?
811
812	0	0				0	if(defined $section) {
813	0	0	0			0	$to = '' unless defined $to and length $to;
814	0					0	return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
815							} else {
816	0					0	return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
817							}
818	0					0	return;
819							}
820
821							###########################################################################
822
823							sub linearize_tokens { # self, tokens
824	37			37	0	45	my $self = shift;
825	37					54	my $out = '';
826
827	37					40	my $t;
828	37					70	while($t = shift @_) {
829	79	50	33			376	if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
		100	66
		100
830	0					0	$out .= $t; # a string, or some insane thing
831							} elsif($t->is_text) {
832	39					159	$out .= $t->text;
833							} elsif($t->is_start and $t->tag eq 'X') {
834							# Ignore until the end of this X<...> sequence:
835	3					5	my $x_open = 1;
836	3					5	while($x_open) {
837	6	100				9	next if( ($t = shift @_)->is_text );
838	3	50	33			8	if( $t->is_start and $t->tag eq 'X') { ++$x_open }
	0	50	33			0
839	3					12	elsif($t->is_end and $t->tag eq 'X') { --$x_open }
840							}
841							}
842							}
843	37	50				72	return undef if length $out > $Linearization_Limit;
844	37					117	return $out;
845							}
846
847							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
848
849							sub unicode_escape_url {
850	69			69	0	108	my($self, $string) = @_;
851	69					100	$string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
852							# Turn char 1234 into "(1234)"
853	69					106	return $string;
854							}
855
856							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
857							sub esc { # a function.
858	462	100		462	0	801	if(defined wantarray) {
859	325	100				430	if(wantarray) {
860	274					563	@_ = splice @_; # break aliasing
861							} else {
862	51					80	my $x = shift;
863	51	50				156	if ($] ge 5.007_003) {
864	51					83	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
	0					0
865							} else { # Is broken for non-ASCII platforms on early perls
866	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
	0					0
867							}
868	51					125	return $x;
869							}
870							}
871	411					612	foreach my $x (@_) {
872							# Escape things very cautiously:
873	1094	50				1516	if (defined $x) {
874	1094	50				2445	if ($] ge 5.007_003) {
875	1094					1866	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
	52					156
876							} else { # Is broken for non-ASCII platforms on early perls
877	0					0	$x =~ s/([^-\n\t !\#\$\%\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
	0					0
878							}
879							}
880							# Leave out "- so that "--" won't make it thru in X-generated comments
881							# with text in them.
882
883							# Yes, stipulate the list without a range, so that this can work right on
884							# all charsets that this module happens to run under.
885							}
886	411					1092	return @_;
887							}
888
889							#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
890
891							1;
892							__END__

File Coverage

", # And also stick in an

", # ''

", # ''

", # ''

", # ''

", # ''