File Coverage

lib/Pod/Simple/HTML.pm
Criterion Covered Total %
statement 288 359 80.2
branch 121 178 67.9
condition 66 121 54.5
subroutine 36 39 92.3
pod 1 31 3.2
total 512 728 70.3


line stmt bran cond sub pod time code
1             require 5;
2             package Pod::Simple::HTML;
3 7     7   17754 use strict;
  7         27  
  7         183  
4 7     7   3228 use Pod::Simple::PullParser ();
  7         15  
  7         232  
5 7         1115 use vars qw(
6             @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
7             $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
8             $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
9             $Doctype_decl $Content_decl
10 7     7   37 );
  7         11  
11             @ISA = ('Pod::Simple::PullParser');
12             $VERSION = '3.43';
13             BEGIN {
14 7 50   7   51 if(defined &DEBUG) { } # no-op
    50          
15 7         34041 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
16 0         0 else { *DEBUG = sub () {0}; }
17             }
18              
19             $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
20             # qq{
21             # "http://www.w3.org/TR/html4/loose.dtd">\n};
22              
23             $Content_decl ||=
24             q{};
25              
26             $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
27             $Computerese = "" unless defined $Computerese;
28             $LamePad = '' unless defined $LamePad;
29              
30             $Linearization_Limit = 120 unless defined $Linearization_Limit;
31             # headings/items longer than that won't get an
32             $Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
33             unless defined $Perldoc_URL_Prefix;
34             $Perldoc_URL_Postfix = ''
35             unless defined $Perldoc_URL_Postfix;
36              
37              
38             $Man_URL_Prefix = 'http://man.he.net/man';
39             $Man_URL_Postfix = '';
40              
41             $Title_Prefix = '' unless defined $Title_Prefix;
42             $Title_Postfix = '' unless defined $Title_Postfix;
43             %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
44             # 'item-text' stuff in the index doesn't quite work, and may
45             # not be a good idea anyhow.
46              
47              
48             __PACKAGE__->_accessorize(
49             'perldoc_url_prefix',
50             # In turning L into http://whatever/Foo%3a%3aBar, what
51             # to put before the "Foo%3a%3aBar".
52             # (for singleton mode only?)
53             'perldoc_url_postfix',
54             # what to put after "Foo%3a%3aBar" in the URL. Normally "".
55              
56             'man_url_prefix',
57             # In turning L into http://whatever/man/1/crontab, what
58             # to put before the "1/crontab".
59             'man_url_postfix',
60             # what to put after the "1/crontab" in the URL. Normally "".
61              
62             'batch_mode', # whether we're in batch mode
63             'batch_mode_current_level',
64             # When in batch mode, how deep the current module is: 1 for "LWP",
65             # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
66            
67             'title_prefix', 'title_postfix',
68             # What to put before and after the title in the head.
69             # Should already be &-escaped
70              
71             'html_h_level',
72            
73             'html_header_before_title',
74             'html_header_after_title',
75             'html_footer',
76             'top_anchor',
77              
78             'index', # whether to add an index at the top of each page
79             # (actually it's a table-of-contents, but we'll call it an index,
80             # out of apparently longstanding habit)
81              
82             'html_css', # URL of CSS file to point to
83             'html_javascript', # URL of Javascript file to point to
84              
85             'force_title', # should already be &-escaped
86             'default_title', # should already be &-escaped
87             );
88              
89             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90             my @_to_accept;
91              
92             %Tagmap = (
93             'Verbatim' => "\n",
94             '/Verbatim' => "\n",
95             'VerbatimFormatted' => "\n",
96             '/VerbatimFormatted' => "\n",
97             'VerbatimB' => "",
98             '/VerbatimB' => "",
99             'VerbatimI' => "",
100             '/VerbatimI' => "",
101             'VerbatimBI' => "",
102             '/VerbatimBI' => "",
103              
104              
105             'Data' => "\n",
106             '/Data' => "\n",
107            
108             'head1' => "\n

", # And also stick in an

109             'head2' => "\n

", # ''

110             'head3' => "\n

", # ''

111             'head4' => "\n

", # ''

112             'head5' => "\n
", # ''
113             'head6' => "\n
", # ''
114             '/head1' => "\n",
115             '/head2' => "\n",
116             '/head3' => "\n",
117             '/head4' => "\n",
118             '/head5' => "\n",
119             '/head6' => "\n",
120              
121             'X' => "",
123              
124             changes(qw(
125             Para=p
126             B=b I=i
127             over-bullet=ul
128             over-number=ol
129             over-text=dl
130             over-block=blockquote
131             item-bullet=li
132             item-number=li
133             item-text=dt
134             )),
135             changes2(
136             map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
137             qw[
138             sample=samp
139             definition=dfn
140             keyboard=kbd
141             variable=var
142             citation=cite
143             abbreviation=abbr
144             acronym=acronym
145             subscript=sub
146             superscript=sup
147             big=big
148             small=small
149             underline=u
150             strikethrough=s
151             preformat=pre
152             teletype=tt
153             ] # no point in providing a way to get ..., I think
154             ),
155            
156             '/item-bullet' => "$LamePad\n",
157             '/item-number' => "$LamePad\n",
158             '/item-text' => "$LamePad\n",
159             'item-body' => "\n
",
160             '/item-body' => "\n",
161              
162              
163             'B' => "", '/B' => "",
164             'I' => "", '/I' => "",
165             'F' => "", '/F' => "",
166             'C' => "", '/C' => "",
167             'L' => "", # ideally never used!
168             '/L' => "",
169             );
170              
171             sub changes {
172 7 50   7 0 15 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  70         357  
173             ? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
174             } @_;
175             }
176             sub changes2 {
177 7 50   7 0 14 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  105         816  
178             ? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
179             } @_;
180             }
181              
182             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183 0     0 0 0 sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
  0         0  
184             # Just so we can run from the command line. No options.
185             # For that, use perldoc!
186             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187              
188             sub new {
189 59     59 1 536 my $new = shift->SUPER::new(@_);
190             #$new->nix_X_codes(1);
191 59         207 $new->nbsp_for_S(1);
192 59         165 $new->accept_targets( 'html', 'HTML' );
193 59         153 $new->accept_codes('VerbatimFormatted');
194 59         162 $new->accept_codes(@_to_accept);
195 59         63 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
196              
197 59         177 $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
198 59         144 $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
199 59         133 $new->man_url_prefix( $Man_URL_Prefix );
200 59         148 $new->man_url_postfix( $Man_URL_Postfix );
201 59         148 $new->title_prefix( $Title_Prefix );
202 59         127 $new->title_postfix( $Title_Postfix );
203              
204 59         197 $new->html_header_before_title(
205             qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="206">206</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="207">207</a> </td> <td class="c3" > 59 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 148 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="208">208</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> "",
209             $Content_decl,
210             "\n",
211             $new->version_tag_comment,
212             "\n",
213             );
214 59         190 $new->html_footer( qq[\n\n\n\n] );
215 59         145 $new->top_anchor( "\n" );
216              
217 59         2350 $new->{'Tagmap'} = {%Tagmap};
218              
219 59         374 return $new;
220             }
221              
222             sub __adjust_html_h_levels {
223 58     58   101 my ($self) = @_;
224 58         85 my $Tagmap = $self->{'Tagmap'};
225              
226 58         112 my $add = $self->html_h_level;
227 58 100       121 return unless defined $add;
228 1 50 50     6 return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
229              
230 1         2 $add -= 1;
231 1         4 for (1 .. 6) {
232 6         42 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
  6         23  
233 6         35 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
  6         17  
234             }
235             }
236              
237             sub batch_mode_page_object_init {
238 10     10 0 23 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
239 10         25 DEBUG and print STDERR "Initting $self\n for $module\n",
240             " in $infile\n out $outfile\n depth $depth\n";
241 10         32 $self->batch_mode(1);
242 10         27 $self->batch_mode_current_level($depth);
243 10         18 return $self;
244             }
245              
246             sub run {
247 59     59 0 73 my $self = $_[0];
248 59 100       129 return $self->do_middle if $self->bare_output;
249             return
250 18   100     37 $self->do_beginning && $self->do_middle && $self->do_end;
251             }
252              
253             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254              
255             sub do_beginning {
256 18     18 0 22 my $self = $_[0];
257              
258 18         26 my $title;
259            
260 18 50       43 if(defined $self->force_title) {
261 0         0 $title = $self->force_title;
262 0         0 DEBUG and print STDERR "Forcing title to be $title\n";
263             } else {
264             # Actually try looking for the title in the document:
265 18         72 $title = $self->get_short_title();
266 18 100       64 unless($self->content_seen) {
267 1         2 DEBUG and print STDERR "No content seen in search for title.\n";
268 1         4 return;
269             }
270 17         33 $self->{'Title'} = $title;
271              
272 17 100 66     75 if(defined $title and $title =~ m/\S/) {
273 14         34 $title = $self->title_prefix . esc($title) . $self->title_postfix;
274             } else {
275 3         8 $title = $self->default_title;
276 3 50       8 $title = '' unless defined $title;
277 3         4 DEBUG and print STDERR "Title defaults to $title\n";
278             }
279             }
280              
281            
282 17   50     38 my $after = $self->html_header_after_title || '';
283 17 100       70 if($self->html_css) {
284 10 50       19 my $link =
285             $self->html_css =~ m/
286             ? $self->html_css # It's a big blob of markup, let's drop it in
287             : sprintf( # It's just a URL, so let's wrap it up
288             qq[\n],
289             $self->html_css,
290             );
291 10         95 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
292             }
293 17         53 $self->_add_top_anchor(\$after);
294              
295 17 100       37 if($self->html_javascript) {
296 10 50       17 my $link =
297             $self->html_javascript =~ m/
298             ? $self->html_javascript # It's a big blob of markup, let's drop it in
299             : sprintf( # It's just a URL, so let's wrap it up
300             qq[\n],
301             $self->html_javascript,
302             );
303 10         104 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
304             }
305              
306 17   50     26 print {$self->{'output_fh'}}
  17         42  
307             $self->html_header_before_title || '',
308             $title, # already escaped
309             $after,
310             ;
311              
312 17         24 DEBUG and print STDERR "Returning from do_beginning...\n";
313 17         69 return 1;
314             }
315              
316             sub _add_top_anchor {
317 17     17   29 my($self, $text_r) = @_;
318 17 100 66     108 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
319 7   50     24 $$text_r .= $self->top_anchor || '';
320             }
321 17         36 return;
322             }
323              
324             sub version_tag_comment {
325 59     59 0 76 my $self = shift;
326             return sprintf
327             "\n",
328             esc(
329             ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
330 59   33     1325 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
331             ), $self->_modnote(),
332             ;
333             }
334              
335             sub _modnote {
336 59   33 59   137 my $class = ref($_[0]) || $_[0];
337 59         1206 return join "\n " => grep m/\S/, split "\n",
338              
339             qq{
340             If you want to change this HTML document, you probably shouldn't do that
341             by changing it directly. Instead, see about changing the calling options
342             to $class, and/or subclassing $class,
343             then reconverting this document from the Pod source.
344             When in doubt, email the author of $class for advice.
345             See 'perldoc $class' for more info.
346             };
347              
348             }
349              
350             sub do_end {
351 17     17 0 33 my $self = $_[0];
352 17   50     18 print {$self->{'output_fh'}} $self->html_footer || '';
  17         67  
353 17         67 return 1;
354             }
355              
356             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357             # Normally this would just be a call to _do_middle_main_loop -- but we
358             # have to do some elaborate things to emit all the content and then
359             # summarize it and output it /before/ the content that it's a summary of.
360              
361             sub do_middle {
362 58     58 0 81 my $self = $_[0];
363 58 100       128 return $self->_do_middle_main_loop unless $self->index;
364              
365 10 50       27 if( $self->output_string ) {
366             # An efficiency hack
367 0         0 my $out = $self->output_string; #it's a reference to it
368 0         0 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
369 0         0 $$out .= $sneakytag;
370 0         0 $self->_do_middle_main_loop;
371 0         0 $sneakytag = quotemeta($sneakytag);
372 0         0 my $index = $self->index_as_html();
373 0 0       0 if( $$out =~ s/$sneakytag/$index/s ) {
374             # Expected case
375 0         0 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
376             } else {
377 0         0 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
378             # I don't think this should ever happen.
379             }
380 0         0 return 1;
381             }
382              
383 10 50       20 unless( $self->output_fh ) {
384 0         0 require Carp;
385 0         0 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
386             }
387              
388             # If we get here, we're outputting to a FH. So we need to do some magic.
389             # Namely, divert all content to a string, which we output after the index.
390 10         16 my $fh = $self->output_fh;
391 10         17 my $content = '';
392             {
393             # Our horrible bait and switch:
394 10         18 $self->output_string( \$content );
  10         25  
395 10         31 $self->_do_middle_main_loop;
396 10         28 $self->abandon_output_string();
397 10         19 $self->output_fh($fh);
398             }
399 10         23 print $fh $self->index_as_html();
400 10         22 print $fh $content;
401              
402 10         39 return 1;
403             }
404              
405             ###########################################################################
406              
407             sub index_as_html {
408 10     10 0 12 my $self = $_[0];
409             # This is meant to be called AFTER the input document has been parsed!
410              
411 10   50     20 my $points = $self->{'PSHTML_index_points'} || [];
412            
413 10 100       26 @$points > 1 or return qq[
\n];
414             # There's no point in having a 0-item or 1-item index, I dare say.
415            
416 8         15 my(@out) = qq{\n
};
417 8         10 my $level = 0;
418              
419 8         12 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
420 8         22 foreach my $p (@$points, ['head0', '(end)']) {
421 26         50 ($tagname, $text) = @$p;
422 26         42 $anchorname = $self->section_escape($text);
423 26 50       80 if( $tagname =~ m{^head(\d+)$} ) {
424 26         51 $target_level = 0 + $1;
425             } else { # must be some kinda list item
426 0 0       0 if($previous_tagname =~ m{^head\d+$} ) {
427 0         0 $target_level = $level + 1;
428             } else {
429 0         0 $target_level = $level; # no change needed
430             }
431             }
432            
433             # Get to target_level by opening or closing ULs
434 26         44 while($level > $target_level)
435 9         13 { --$level; push @out, (" " x $level) . ""; }
  9         22  
436 26         46 while($level < $target_level)
437 9         11 { ++$level; push @out, (" " x ($level-1))
  9         36  
438             . "
    "; }
439              
440 26         33 $previous_tagname = $tagname;
441 26 100       43 next unless $level;
442            
443 18         22 $indent = ' ' x $level;
444 18         38 push @out, sprintf
445             "%s
  • %s",
  • 446             $indent, $level, esc($anchorname), esc($text)
    447             ;
    448             }
    449 8         20 push @out, "\n";
    450 8         36 return join "\n", @out;
    451             }
    452              
    453             ###########################################################################
    454              
    455             sub _do_middle_main_loop {
    456 58     58   87 my $self = $_[0];
    457 58         76 my $fh = $self->{'output_fh'};
    458 58         71 my $tagmap = $self->{'Tagmap'};
    459              
    460 58         130 $self->__adjust_html_h_levels;
    461            
    462 58         113 my($token, $type, $tagname, $linkto, $linktype);
    463 58         0 my @stack;
    464 58         78 my $dont_wrap = 0;
    465              
    466 58         136 while($token = $self->get_token) {
    467              
    468             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    469 515 100       952 if( ($type = $token->type) eq 'start' ) {
        100          
        50          
    470 200 100 100     355 if(($tagname = $token->tagname) eq 'L') {
        100          
        100          
    471 22   50     75 $linktype = $token->attr('type') || 'insane';
    472            
    473 22         51 $linkto = $self->do_link($token);
    474              
    475 22 50 33     73 if(defined $linkto and length $linkto) {
    476 22         65 esc($linkto);
    477             # (Yes, SGML-escaping applies on top of %-escaping!
    478             # But it's rarely noticeable in practice.)
    479 22         69 print $fh qq{};
    480             } else {
    481 0         0 print $fh ""; # Yes, an 'a' element with no attributes!
    482             }
    483              
    484             } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
    485 37   50     143 print $fh $tagmap->{$tagname} || next;
    486              
    487 37         45 my @to_unget;
    488 37         37 while(1) {
    489 85         152 push @to_unget, $self->get_token;
    490 85 100 100     195 last if $to_unget[-1]->is_end
    491             and $to_unget[-1]->tagname eq $tagname;
    492            
    493             # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
    494             }
    495              
    496 37         95 my $name = $self->linearize_tokens(@to_unget);
    497 37 50       108 $name = $self->do_section($name, $token) if defined $name;
    498              
    499 37         84 print $fh "
    500 37 100       114 if ($tagname =~ m/^head\d$/s) {
    501 33 100       80 print $fh "class='u'", $self->index
    502             ? " href='#___top' title='click to go to top of document'\n"
    503             : "\n";
    504             }
    505            
    506 37 50       57 if(defined $name) {
    507 37         78 my $esc = esc( $self->section_name_tidy( $name ) );
    508 37         114 print $fh qq[name="$esc"];
    509 37         44 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    510             " tokens as \"$name\".\n";
    511 31         98 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
    512 37 100       90 if $ToIndex{ $tagname };
    513             # Obviously, this discards all formatting codes (saving
    514             # just their content), but ahwell.
    515            
    516             } else { # ludicrously long, so nevermind
    517 0         0 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    518             " tokens, but it was too long, so nevermind.\n";
    519             }
    520 37         83 print $fh "\n>";
    521 37         99 $self->unget_token(@to_unget);
    522              
    523             } elsif ($tagname eq 'Data') {
    524 4         8 my $next = $self->get_token;
    525 4 50       7 next unless defined $next;
    526 4 50       8 unless( $next->type eq 'text' ) {
    527 0         0 $self->unget_token($next);
    528 0         0 next;
    529             }
    530 4         6 DEBUG and print STDERR " raw text ", $next->text, "\n";
    531             # The parser sometimes preserves newlines and sometimes doesn't!
    532 4         5 (my $text = $next->text) =~ s/\n\z//;
    533 4         22 print $fh $text, "\n";
    534 4         13 next;
    535            
    536             } else {
    537 137 100 33     455 if( $tagname =~ m/^over-/s ) {
        50 33        
    538 3         10 push @stack, '';
    539             } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
    540 0         0 print $fh $stack[-1];
    541 0         0 $stack[-1] = '';
    542             }
    543 137   100     471 print $fh $tagmap->{$tagname} || next;
    544 78 100 66     383 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
          100        
    545             or $tagname eq 'X';
    546             }
    547              
    548             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    549             } elsif( $type eq 'end' ) {
    550 200 100 66     378 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
        100          
    551 3 50       11 if( my $end = pop @stack ) {
    552 3         8 print $fh $end;
    553             }
    554             } elsif( $tagname =~ m/^item-/s and @stack) {
    555 4         12 $stack[-1] = $tagmap->{"/$tagname"};
    556 4 50 33     30 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
    557 4         11 $self->unget_token($next);
    558 4 100       10 if( $next->type eq 'start' ) {
    559 3         10 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
    560 3         7 $stack[-1] = $tagmap->{"/item-body"};
    561             }
    562             }
    563 4         19 next;
    564             }
    565 196   100     626 print $fh $tagmap->{"/$tagname"} || next;
    566 137 100 66     498 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
    567              
    568             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    569             } elsif( $type eq 'text' ) {
    570 115         211 esc($type = $token->text); # reuse $type, why not
    571 115 100       323 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
    572 115         247 print $fh $type;
    573             }
    574              
    575             }
    576 58         189 return 1;
    577             }
    578              
    579             ###########################################################################
    580             #
    581              
    582             sub do_section {
    583 36     36 0 73 my($self, $name, $token) = @_;
    584 36         51 return $name;
    585             }
    586              
    587             sub do_link {
    588 22     22 0 35 my($self, $token) = @_;
    589 22         39 my $type = $token->attr('type');
    590 22 50       63 if(!defined $type) {
        100          
        100          
        50          
    591 0         0 $self->whine("Typeless L!?", $token->attr('start_line'));
    592 10         24 } elsif( $type eq 'pod') { return $self->do_pod_link($token);
    593 9         22 } elsif( $type eq 'url') { return $self->do_url_link($token);
    594 3         10 } elsif( $type eq 'man') { return $self->do_man_link($token);
    595             } else {
    596 0         0 $self->whine("L of unknown type $type!?", $token->attr('start_line'));
    597             }
    598 0         0 return 'FNORG'; # should never get called
    599             }
    600              
    601             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    602              
    603 9     9 0 16 sub do_url_link { return $_[1]->attr('to') }
    604              
    605             sub do_man_link {
    606 3     3 0 8 my ($self, $link) = @_;
    607 3         5 my $to = $link->attr('to');
    608 3         7 my $frag = $link->attr('section');
    609              
    610 3 50 33     22 return undef unless defined $to and length $to; # should never happen
    611              
    612 3 100 66     13 $frag = $self->section_escape($frag)
    613             if defined $frag and length($frag .= ''); # (stringify)
    614              
    615 3         4 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
    616              
    617 3         10 return $self->resolve_man_page_link($to, $frag);
    618             }
    619              
    620              
    621             sub do_pod_link {
    622             # And now things get really messy...
    623 10     10 0 19 my($self, $link) = @_;
    624 10         19 my $to = $link->attr('to');
    625 10         19 my $section = $link->attr('section');
    626             return undef unless( # should never happen
    627 10 50 66     44 (defined $to and length $to) or
          33        
          66        
    628             (defined $section and length $section)
    629             );
    630              
    631 10 100 66     32 $section = $self->section_escape($section)
    632             if defined $section and length($section .= ''); # (stringify)
    633              
    634 10         12 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
    635             $to || "(nil)", $section || "(nil)";
    636            
    637             {
    638             # An early hack:
    639 10         12 my $complete_url = $self->resolve_pod_link_by_table($to, $section);
      10         30  
    640 10 50       17 if( $complete_url ) {
    641 0         0 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
    642             $complete_url, "\n (Returning that.)\n";
    643 0         0 return $complete_url;
    644             } else {
    645 10         14 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
    646             " didn't return anything interesting.\n";
    647             }
    648             }
    649              
    650 10 100 66     33 if(defined $to and length $to) {
    651             # Give this routine first hack again
    652 7         11 my $there = $self->resolve_pod_link_by_table($to);
    653 7 50 33     18 if(defined $there and length $there) {
    654 0         0 DEBUG > 1
    655             and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
    656             } else {
    657 7         17 $there =
    658             $self->resolve_pod_page_link($to, $section);
    659             # (I pass it the section value, but I don't see a
    660             # particular reason it'd use it.)
    661 7         10 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
    662 7 50 33     24 unless( defined $there and length $there ) {
    663 0         0 DEBUG and print STDERR "Can't resolve $to\n";
    664 0         0 return undef;
    665             }
    666             # resolve_pod_page_link returning undef is how it
    667             # can signal that it gives up on making a link
    668             }
    669 7         11 $to = $there;
    670             }
    671              
    672             #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
    673              
    674 10 100 66     31 my $out = (defined $to and length $to) ? $to : '';
    675 10 100 66     32 $out .= "#" . $section if defined $section and length $section;
    676            
    677 10 50       17 unless(length $out) { # sanity check
    678 0         0 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
    679             $to || "(nil)", $section || "(nil)";
    680 0         0 return undef;
    681             }
    682              
    683 10         11 DEBUG and print STDERR "Resolved to $out\n";
    684 10         25 return $out;
    685             }
    686              
    687              
    688             # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    689              
    690             sub section_escape {
    691 32     32 0 46 my($self, $section) = @_;
    692 32         54 return $self->section_url_escape(
    693             $self->section_name_tidy($section)
    694             );
    695             }
    696              
    697             sub section_name_tidy {
    698 69     69 0 123 my($self, $section) = @_;
    699 69         156 $section =~ s/^\s+//;
    700 69         130 $section =~ s/\s+$//;
    701 69         109 $section =~ tr/ /_/;
    702 69 50       270 if ($] ge 5.006) {
    703 69         134 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
    704             } elsif ('A' eq chr(65)) { # But not on early EBCDIC
    705 0         0 $section =~ tr/\x00-\x1F\x80-\x9F//d;
    706             }
    707 69         131 $section = $self->unicode_escape_url($section);
    708 69 50       133 $section = '_' unless length $section;
    709 69         134 return $section;
    710             }
    711              
    712 32     32 0 58 sub section_url_escape { shift->general_url_escape(@_) }
    713 7     7 0 18 sub pagepath_url_escape { shift->general_url_escape(@_) }
    714 3     3 0 7 sub manpage_url_escape { shift->general_url_escape(@_) }
    715              
    716             sub general_url_escape {
    717 42     42 0 62 my($self, $string) = @_;
    718            
    719 42         67 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
      0         0  
    720             # express Unicode things as urlencode(utf(orig)).
    721            
    722             # A pretty conservative escaping, behoovey even for query components
    723             # of a URL (see RFC 2396)
    724            
    725 42 50       132 if ($] ge 5.007_003) {
    726 42         67 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
      8         39  
    727             } else { # Is broken for non-ASCII platforms on early perls
    728 0         0 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
      0         0  
    729             }
    730             # Yes, stipulate the list without a range, so that this can work right on
    731             # all charsets that this module happens to run under.
    732            
    733 42         79 return $string;
    734             }
    735              
    736             #--------------------------------------------------------------------------
    737             #
    738             # Oh look, a yawning portal to Hell! Let's play touch football right by it!
    739             #
    740              
    741             sub resolve_pod_page_link {
    742             # resolve_pod_page_link must return a properly escaped URL
    743 7     7 0 12 my $self = shift;
    744 7 50       20 return $self->batch_mode()
    745             ? $self->resolve_pod_page_link_batch_mode(@_)
    746             : $self->resolve_pod_page_link_singleton_mode(@_)
    747             ;
    748             }
    749              
    750             sub resolve_pod_page_link_singleton_mode {
    751 7     7 0 26 my($self, $it) = @_;
    752 7 50 33     21 return undef unless defined $it and length $it;
    753 7         22 my $url = $self->pagepath_url_escape($it);
    754            
    755 7         16 $url =~ s{::$}{}s; # probably never comes up anyway
    756 7 50       17 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
    757            
    758 7 50       19 return undef unless length $url;
    759 7         15 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
    760             }
    761              
    762             sub resolve_pod_page_link_batch_mode {
    763 0     0 0 0 my($self, $to) = @_;
    764 0         0 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
    765 0         0 my @path = grep length($_), split m/::/s, $to, -1;
    766 0 0       0 unless( @path ) { # sanity
    767 0         0 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
    768 0         0 return undef;
    769             }
    770 0         0 $self->batch_mode_rectify_path(\@path);
    771 0         0 my $out = join('/', map $self->pagepath_url_escape($_), @path)
    772             . $HTML_EXTENSION;
    773 0         0 DEBUG > 1 and print STDERR " => $out\n";
    774 0         0 return $out;
    775             }
    776              
    777             sub batch_mode_rectify_path {
    778 0     0 0 0 my($self, $pathbits) = @_;
    779 0         0 my $level = $self->batch_mode_current_level;
    780 0         0 $level--; # how many levels up to go to get to the root
    781 0 0       0 if($level < 1) {
    782 0         0 unshift @$pathbits, '.'; # just to be pretty
    783             } else {
    784 0         0 unshift @$pathbits, ('..') x $level;
    785             }
    786 0         0 return;
    787             }
    788              
    789             sub resolve_man_page_link {
    790 3     3 0 9 my ($self, $to, $frag) = @_;
    791 3         7 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
    792              
    793 3 50 33     16 return undef unless defined $page and length $page;
    794 3   50     6 $section ||= 1;
    795              
    796 3         10 return $self->man_url_prefix . "$section/"
    797             . $self->manpage_url_escape($page)
    798             . $self->man_url_postfix;
    799             }
    800              
    801             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    802              
    803             sub resolve_pod_link_by_table {
    804             # A crazy hack to allow specifying custom L => URL mappings
    805              
    806 17 50   17 0 47 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
    807              
    808 0         0 my($self, $to, $section) = @_;
    809              
    810             # TODO: add a method that actually populates podhtml_LOT from a file?
    811              
    812 0 0       0 if(defined $section) {
    813 0 0 0     0 $to = '' unless defined $to and length $to;
    814 0         0 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
    815             } else {
    816 0         0 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
    817             }
    818 0         0 return;
    819             }
    820              
    821             ###########################################################################
    822              
    823             sub linearize_tokens { # self, tokens
    824 37     37 0 45 my $self = shift;
    825 37         54 my $out = '';
    826            
    827 37         40 my $t;
    828 37         70 while($t = shift @_) {
    829 79 50 33     376 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
        100 66        
        100          
    830 0         0 $out .= $t; # a string, or some insane thing
    831             } elsif($t->is_text) {
    832 39         159 $out .= $t->text;
    833             } elsif($t->is_start and $t->tag eq 'X') {
    834             # Ignore until the end of this X<...> sequence:
    835 3         5 my $x_open = 1;
    836 3         5 while($x_open) {
    837 6 100       9 next if( ($t = shift @_)->is_text );
    838 3 50 33     8 if( $t->is_start and $t->tag eq 'X') { ++$x_open }
      0 50 33     0  
    839 3         12 elsif($t->is_end and $t->tag eq 'X') { --$x_open }
    840             }
    841             }
    842             }
    843 37 50       72 return undef if length $out > $Linearization_Limit;
    844 37         117 return $out;
    845             }
    846              
    847             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    848              
    849             sub unicode_escape_url {
    850 69     69 0 108 my($self, $string) = @_;
    851 69         100 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
      0         0  
    852             # Turn char 1234 into "(1234)"
    853 69         106 return $string;
    854             }
    855              
    856             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    857             sub esc { # a function.
    858 462 100   462 0 801 if(defined wantarray) {
    859 325 100       430 if(wantarray) {
    860 274         563 @_ = splice @_; # break aliasing
    861             } else {
    862 51         80 my $x = shift;
    863 51 50       156 if ($] ge 5.007_003) {
    864 51         83 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
      0         0  
    865             } else { # Is broken for non-ASCII platforms on early perls
    866 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
      0         0  
    867             }
    868 51         125 return $x;
    869             }
    870             }
    871 411         612 foreach my $x (@_) {
    872             # Escape things very cautiously:
    873 1094 50       1516 if (defined $x) {
    874 1094 50       2445 if ($] ge 5.007_003) {
    875 1094         1866 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
      52         156  
    876             } else { # Is broken for non-ASCII platforms on early perls
    877 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
      0         0  
    878             }
    879             }
    880             # Leave out "- so that "--" won't make it thru in X-generated comments
    881             # with text in them.
    882              
    883             # Yes, stipulate the list without a range, so that this can work right on
    884             # all charsets that this module happens to run under.
    885             }
    886 411         1092 return @_;
    887             }
    888              
    889             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    890              
    891             1;
    892             __END__