File Coverage

blib/lib/Pod/Simple/HTML.pm
Criterion Covered Total %
statement 288 359 80.2
branch 121 178 67.9
condition 66 121 54.5
subroutine 36 39 92.3
pod 1 31 3.2
total 512 728 70.3


line stmt bran cond sub pod time code
1             package Pod::Simple::HTML;
2 7     7   211870 use strict;
  7         36  
  7         200  
3 7     7   39 use warnings;
  7         13  
  7         161  
4 7     7   3845 use Pod::Simple::PullParser ();
  7         25  
  7         619  
5             our @ISA = ('Pod::Simple::PullParser');
6             our $VERSION = '3.45';
7             BEGIN {
8 7 50   7   58 if(defined &DEBUG) { } # no-op
    50          
9 7         42516 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
10 0         0 else { *DEBUG = sub () {0}; }
11             }
12              
13             our $Doctype_decl ||= ''; # No. Just No. Don't even ask me for it.
14             # qq{
15             # "http://www.w3.org/TR/html4/loose.dtd">\n};
16              
17             our $Content_decl ||=
18             q{};
19              
20             our $HTML_EXTENSION;
21             $HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
22             our $Computerese;
23             $Computerese = "" unless defined $Computerese;
24             our $LamePad;
25             $LamePad = '' unless defined $LamePad;
26              
27             our $Linearization_Limit;
28             $Linearization_Limit = 120 unless defined $Linearization_Limit;
29             # headings/items longer than that won't get an
30             our $Perldoc_URL_Prefix;
31             $Perldoc_URL_Prefix = 'https://metacpan.org/pod/'
32             unless defined $Perldoc_URL_Prefix;
33             our $Perldoc_URL_Postfix;
34             $Perldoc_URL_Postfix = ''
35             unless defined $Perldoc_URL_Postfix;
36              
37              
38             our $Man_URL_Prefix = 'http://man.he.net/man';
39             our $Man_URL_Postfix = '';
40              
41             our $Title_Prefix;
42             $Title_Prefix = '' unless defined $Title_Prefix;
43             our $Title_Postfix;
44             $Title_Postfix = '' unless defined $Title_Postfix;
45             our %ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46             # 'item-text' stuff in the index doesn't quite work, and may
47             # not be a good idea anyhow.
48              
49              
50             __PACKAGE__->_accessorize(
51             'perldoc_url_prefix',
52             # In turning L into http://whatever/Foo%3a%3aBar, what
53             # to put before the "Foo%3a%3aBar".
54             # (for singleton mode only?)
55             'perldoc_url_postfix',
56             # what to put after "Foo%3a%3aBar" in the URL. Normally "".
57              
58             'man_url_prefix',
59             # In turning L into http://whatever/man/1/crontab, what
60             # to put before the "1/crontab".
61             'man_url_postfix',
62             # what to put after the "1/crontab" in the URL. Normally "".
63              
64             'batch_mode', # whether we're in batch mode
65             'batch_mode_current_level',
66             # When in batch mode, how deep the current module is: 1 for "LWP",
67             # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68              
69             'title_prefix', 'title_postfix',
70             # What to put before and after the title in the head.
71             # Should already be &-escaped
72              
73             'html_h_level',
74              
75             'html_header_before_title',
76             'html_header_after_title',
77             'html_footer',
78             'top_anchor',
79              
80             'index', # whether to add an index at the top of each page
81             # (actually it's a table-of-contents, but we'll call it an index,
82             # out of apparently longstanding habit)
83              
84             'html_css', # URL of CSS file to point to
85             'html_javascript', # URL of Javascript file to point to
86              
87             'force_title', # should already be &-escaped
88             'default_title', # should already be &-escaped
89             );
90              
91             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
92             my @_to_accept;
93              
94             our %Tagmap = (
95             'Verbatim' => "\n",
96             '/Verbatim' => "\n",
97             'VerbatimFormatted' => "\n",
98             '/VerbatimFormatted' => "\n",
99             'VerbatimB' => "",
100             '/VerbatimB' => "",
101             'VerbatimI' => "",
102             '/VerbatimI' => "",
103             'VerbatimBI' => "",
104             '/VerbatimBI' => "",
105              
106              
107             'Data' => "\n",
108             '/Data' => "\n",
109              
110             'head1' => "\n

", # And also stick in an

111             'head2' => "\n

", # ''

112             'head3' => "\n

", # ''

113             'head4' => "\n

", # ''

114             'head5' => "\n
", # ''
115             'head6' => "\n
", # ''
116             '/head1' => "\n",
117             '/head2' => "\n",
118             '/head3' => "\n",
119             '/head4' => "\n",
120             '/head5' => "\n",
121             '/head6' => "\n",
122              
123             'X' => "",
125              
126             changes(qw(
127             Para=p
128             B=b I=i
129             over-bullet=ul
130             over-number=ol
131             over-text=dl
132             over-block=blockquote
133             item-bullet=li
134             item-number=li
135             item-text=dt
136             )),
137             changes2(
138             map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
139             qw[
140             sample=samp
141             definition=dfn
142             keyboard=kbd
143             variable=var
144             citation=cite
145             abbreviation=abbr
146             acronym=acronym
147             subscript=sub
148             superscript=sup
149             big=big
150             small=small
151             underline=u
152             strikethrough=s
153             preformat=pre
154             teletype=tt
155             ] # no point in providing a way to get ..., I think
156             ),
157              
158             '/item-bullet' => "$LamePad\n",
159             '/item-number' => "$LamePad\n",
160             '/item-text' => "$LamePad\n",
161             'item-body' => "\n
",
162             '/item-body' => "\n",
163              
164              
165             'B' => "", '/B' => "",
166             'I' => "", '/I' => "",
167             'F' => "", '/F' => "",
168             'C' => "", '/C' => "",
169             'L' => "", # ideally never used!
170             '/L' => "",
171             );
172              
173             sub changes {
174 7 50   7 0 15 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  70         443  
175             ? ( $1, => "\n<$2>", "/$1", => "\n" ) : die "Funky $_"
176             } @_;
177             }
178             sub changes2 {
179 7 50   7 0 16 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
  105         968  
180             ? ( $1, => "<$2>", "/$1", => "" ) : die "Funky $_"
181             } @_;
182             }
183              
184             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
185 0     0 0 0 sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
  0         0  
186             # Just so we can run from the command line. No options.
187             # For that, use perldoc!
188             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
189              
190             sub new {
191 59     59 1 846 my $new = shift->SUPER::new(@_);
192             #$new->nix_X_codes(1);
193 59         239 $new->nbsp_for_S(1);
194 59         209 $new->accept_targets( 'html', 'HTML' );
195 59         194 $new->accept_codes('VerbatimFormatted');
196 59         195 $new->accept_codes(@_to_accept);
197 59         86 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
198              
199 59         191 $new->perldoc_url_prefix( $Perldoc_URL_Prefix );
200 59         185 $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
201 59         164 $new->man_url_prefix( $Man_URL_Prefix );
202 59         169 $new->man_url_postfix( $Man_URL_Postfix );
203 59         183 $new->title_prefix( $Title_Prefix );
204 59         162 $new->title_postfix( $Title_Postfix );
205              
206 59         251 $new->html_header_before_title(
207             qq[$Doctype_decl] </td> </tr> <tr> <td class="h" > <a name="208">208</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> ); </td> </tr> <tr> <td class="h" > <a name="209">209</a> </td> <td class="c3" > 59 </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td > 187 </td> <td class="s"> $new->html_header_after_title( join "\n" => </td> </tr> <tr> <td class="h" > <a name="210">210</a> </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td >   </td> <td class="s"> "",
211             $Content_decl,
212             "\n",
213             $new->version_tag_comment,
214             "\n",
215             );
216 59         223 $new->html_footer( qq[\n\n\n\n] );
217 59         158 $new->top_anchor( "\n" );
218              
219 59         2342 $new->{'Tagmap'} = {%Tagmap};
220              
221 59         356 return $new;
222             }
223              
224             sub __adjust_html_h_levels {
225 58     58   115 my ($self) = @_;
226 58         87 my $Tagmap = $self->{'Tagmap'};
227              
228 58         181 my $add = $self->html_h_level;
229 58 100       152 return unless defined $add;
230 1 50 50     14 return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
231              
232 1         2 $add -= 1;
233 1         7 for (1 .. 6) {
234 6         70 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e;
  6         19  
235 6         41 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
  6         19  
236             }
237             }
238              
239             sub batch_mode_page_object_init {
240 10     10 0 23 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
241 10         14 DEBUG and print STDERR "Initting $self\n for $module\n",
242             " in $infile\n out $outfile\n depth $depth\n";
243 10         26 $self->batch_mode(1);
244 10         27 $self->batch_mode_current_level($depth);
245 10         19 return $self;
246             }
247              
248             sub run {
249 59     59 0 93 my $self = $_[0];
250 59 100       170 return $self->do_middle if $self->bare_output;
251             return
252 18   100     48 $self->do_beginning && $self->do_middle && $self->do_end;
253             }
254              
255             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256              
257             sub do_beginning {
258 18     18 0 28 my $self = $_[0];
259              
260 18         21 my $title;
261              
262 18 50       42 if(defined $self->force_title) {
263 0         0 $title = $self->force_title;
264 0         0 DEBUG and print STDERR "Forcing title to be $title\n";
265             } else {
266             # Actually try looking for the title in the document:
267 18         64 $title = $self->get_short_title();
268 18 100       75 unless($self->content_seen) {
269 1         5 DEBUG and print STDERR "No content seen in search for title.\n";
270 1         5 return;
271             }
272 17         39 $self->{'Title'} = $title;
273              
274 17 100 66     83 if(defined $title and $title =~ m/\S/) {
275 14         40 $title = $self->title_prefix . esc($title) . $self->title_postfix;
276             } else {
277 3         11 $title = $self->default_title;
278 3 50       11 $title = '' unless defined $title;
279 3         5 DEBUG and print STDERR "Title defaults to $title\n";
280             }
281             }
282              
283              
284 17   50     52 my $after = $self->html_header_after_title || '';
285 17 100       49 if($self->html_css) {
286 10 50       20 my $link =
287             $self->html_css =~ m/
288             ? $self->html_css # It's a big blob of markup, let's drop it in
289             : sprintf( # It's just a URL, so let's wrap it up
290             qq[\n],
291             $self->html_css,
292             );
293 10         120 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
294             }
295 17         71 $self->_add_top_anchor(\$after);
296              
297 17 100       53 if($self->html_javascript) {
298 10 50       27 my $link =
299             $self->html_javascript =~ m/
300             ? $self->html_javascript # It's a big blob of markup, let's drop it in
301             : sprintf( # It's just a URL, so let's wrap it up
302             qq[\n],
303             $self->html_javascript,
304             );
305 10         110 $after =~ s{()}{$link\n$1}i; # otherwise nevermind
306             }
307              
308 17   50     31 print {$self->{'output_fh'}}
  17         56  
309             $self->html_header_before_title || '',
310             $title, # already escaped
311             $after,
312             ;
313              
314 17         33 DEBUG and print STDERR "Returning from do_beginning...\n";
315 17         92 return 1;
316             }
317              
318             sub _add_top_anchor {
319 17     17   33 my($self, $text_r) = @_;
320 17 100 66     150 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
321 7   50     49 $$text_r .= $self->top_anchor || '';
322             }
323 17         73 return;
324             }
325              
326             sub version_tag_comment {
327 59     59 0 90 my $self = shift;
328             return sprintf
329             "\n",
330             esc(
331             ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
332 59   33     1494 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
333             ), $self->_modnote(),
334             ;
335             }
336              
337             sub _modnote {
338 59   33 59   184 my $class = ref($_[0]) || $_[0];
339 59         1450 return join "\n " => grep m/\S/, split "\n",
340              
341             qq{
342             If you want to change this HTML document, you probably shouldn't do that
343             by changing it directly. Instead, see about changing the calling options
344             to $class, and/or subclassing $class,
345             then reconverting this document from the Pod source.
346             When in doubt, email the author of $class for advice.
347             See 'perldoc $class' for more info.
348             };
349              
350             }
351              
352             sub do_end {
353 17     17 0 34 my $self = $_[0];
354 17   50     36 print {$self->{'output_fh'}} $self->html_footer || '';
  17         86  
355 17         91 return 1;
356             }
357              
358             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
359             # Normally this would just be a call to _do_middle_main_loop -- but we
360             # have to do some elaborate things to emit all the content and then
361             # summarize it and output it /before/ the content that it's a summary of.
362              
363             sub do_middle {
364 58     58 0 85 my $self = $_[0];
365 58 100       163 return $self->_do_middle_main_loop unless $self->index;
366              
367 10 50       34 if( $self->output_string ) {
368             # An efficiency hack
369 0         0 my $out = $self->output_string; #it's a reference to it
370 0         0 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
371 0         0 $$out .= $sneakytag;
372 0         0 $self->_do_middle_main_loop;
373 0         0 $sneakytag = quotemeta($sneakytag);
374 0         0 my $index = $self->index_as_html();
375 0 0       0 if( $$out =~ s/$sneakytag/$index/s ) {
376             # Expected case
377 0         0 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
378             } else {
379 0         0 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
380             # I don't think this should ever happen.
381             }
382 0         0 return 1;
383             }
384              
385 10 50       47 unless( $self->output_fh ) {
386 0         0 require Carp;
387 0         0 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that.");
388             }
389              
390             # If we get here, we're outputting to a FH. So we need to do some magic.
391             # Namely, divert all content to a string, which we output after the index.
392 10         21 my $fh = $self->output_fh;
393 10         26 my $content = '';
394             {
395             # Our horrible bait and switch:
396 10         13 $self->output_string( \$content );
  10         27  
397 10         33 $self->_do_middle_main_loop;
398 10         37 $self->abandon_output_string();
399 10         21 $self->output_fh($fh);
400             }
401 10         28 print $fh $self->index_as_html();
402 10         41 print $fh $content;
403              
404 10         51 return 1;
405             }
406              
407             ###########################################################################
408              
409             sub index_as_html {
410 10     10 0 14 my $self = $_[0];
411             # This is meant to be called AFTER the input document has been parsed!
412              
413 10   50     34 my $points = $self->{'PSHTML_index_points'} || [];
414              
415 10 100       39 @$points > 1 or return qq[
\n];
416             # There's no point in having a 0-item or 1-item index, I dare say.
417              
418 8         21 my(@out) = qq{\n
};
419 8         11 my $level = 0;
420              
421 8         13 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
422 8         25 foreach my $p (@$points, ['head0', '(end)']) {
423 26         58 ($tagname, $text) = @$p;
424 26         54 $anchorname = $self->section_escape($text);
425 26 50       103 if( $tagname =~ m{^head(\d+)$} ) {
426 26         62 $target_level = 0 + $1;
427             } else { # must be some kinda list item
428 0 0       0 if($previous_tagname =~ m{^head\d+$} ) {
429 0         0 $target_level = $level + 1;
430             } else {
431 0         0 $target_level = $level; # no change needed
432             }
433             }
434              
435             # Get to target_level by opening or closing ULs
436 26         56 while($level > $target_level)
437 9         18 { --$level; push @out, (" " x $level) . ""; }
  9         35  
438 26         52 while($level < $target_level)
439 9         14 { ++$level; push @out, (" " x ($level-1))
  9         109  
440             . "
    "; }
441              
442 26         34 $previous_tagname = $tagname;
443 26 100       53 next unless $level;
444              
445 18         35 $indent = ' ' x $level;
446 18         36 push @out, sprintf
447             "%s
  • %s",
  • 448             $indent, $level, esc($anchorname), esc($text)
    449             ;
    450             }
    451 8         16 push @out, "\n";
    452 8         46 return join "\n", @out;
    453             }
    454              
    455             ###########################################################################
    456              
    457             sub _do_middle_main_loop {
    458 58     58   90 my $self = $_[0];
    459 58         89 my $fh = $self->{'output_fh'};
    460 58         89 my $tagmap = $self->{'Tagmap'};
    461              
    462 58         150 $self->__adjust_html_h_levels;
    463              
    464 58         135 my($token, $type, $tagname, $linkto, $linktype);
    465 58         0 my @stack;
    466 58         79 my $dont_wrap = 0;
    467              
    468 58         150 while($token = $self->get_token) {
    469              
    470             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    471 515 100       1224 if( ($type = $token->type) eq 'start' ) {
        100          
        50          
    472 200 100 100     422 if(($tagname = $token->tagname) eq 'L') {
        100          
        100          
    473 22   50     70 $linktype = $token->attr('type') || 'insane';
    474              
    475 22         56 $linkto = $self->do_link($token);
    476              
    477 22 50 33     87 if(defined $linkto and length $linkto) {
    478 22         56 esc($linkto);
    479             # (Yes, SGML-escaping applies on top of %-escaping!
    480             # But it's rarely noticeable in practice.)
    481 22         106 print $fh qq{};
    482             } else {
    483 0         0 print $fh ""; # Yes, an 'a' element with no attributes!
    484             }
    485              
    486             } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
    487 37   50     173 print $fh $tagmap->{$tagname} || next;
    488              
    489 37         78 my @to_unget;
    490 37         54 while(1) {
    491 85         198 push @to_unget, $self->get_token;
    492 85 100 100     222 last if $to_unget[-1]->is_end
    493             and $to_unget[-1]->tagname eq $tagname;
    494              
    495             # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens)
    496             }
    497              
    498 37         122 my $name = $self->linearize_tokens(@to_unget);
    499 37 50       131 $name = $self->do_section($name, $token) if defined $name;
    500              
    501 37         118 print $fh "
    502 37 100       190 if ($tagname =~ m/^head\d$/s) {
    503 33 100       106 print $fh "class='u'", $self->index
    504             ? " href='#___top' title='click to go to top of document'\n"
    505             : "\n";
    506             }
    507              
    508 37 50       85 if(defined $name) {
    509 37         98 my $esc = esc( $self->section_name_tidy( $name ) );
    510 37         147 print $fh qq[name="$esc"];
    511 37         53 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    512             " tokens as \"$name\".\n";
    513 31         115 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
    514 37 100       101 if $ToIndex{ $tagname };
    515             # Obviously, this discards all formatting codes (saving
    516             # just their content), but ahwell.
    517              
    518             } else { # ludicrously long, so nevermind
    519 0         0 DEBUG and print STDERR "Linearized ", scalar(@to_unget),
    520             " tokens, but it was too long, so nevermind.\n";
    521             }
    522 37         98 print $fh "\n>";
    523 37         106 $self->unget_token(@to_unget);
    524              
    525             } elsif ($tagname eq 'Data') {
    526 4         10 my $next = $self->get_token;
    527 4 50       10 next unless defined $next;
    528 4 50       11 unless( $next->type eq 'text' ) {
    529 0         0 $self->unget_token($next);
    530 0         0 next;
    531             }
    532 4         7 DEBUG and print STDERR " raw text ", $next->text, "\n";
    533             # The parser sometimes preserves newlines and sometimes doesn't!
    534 4         11 (my $text = $next->text) =~ s/\n\z//;
    535 4         17 print $fh $text, "\n";
    536 4         13 next;
    537              
    538             } else {
    539 137 100 33     503 if( $tagname =~ m/^over-/s ) {
        50 33        
    540 3         15 push @stack, '';
    541             } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
    542 0         0 print $fh $stack[-1];
    543 0         0 $stack[-1] = '';
    544             }
    545 137   100     574 print $fh $tagmap->{$tagname} || next;
    546 78 100 66     452 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
          100        
    547             or $tagname eq 'X';
    548             }
    549              
    550             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    551             } elsif( $type eq 'end' ) {
    552 200 100 66     424 if( ($tagname = $token->tagname) =~ m/^over-/s ) {
        100          
    553 3 50       81 if( my $end = pop @stack ) {
    554 3         13 print $fh $end;
    555             }
    556             } elsif( $tagname =~ m/^item-/s and @stack) {
    557 4         37 $stack[-1] = $tagmap->{"/$tagname"};
    558 4 50 33     21 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
    559 4         18 $self->unget_token($next);
    560 4 100       14 if( $next->type eq 'start' ) {
    561 3         16 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
    562 3         10 $stack[-1] = $tagmap->{"/item-body"};
    563             }
    564             }
    565 4         10 next;
    566             }
    567 196   100     724 print $fh $tagmap->{"/$tagname"} || next;
    568 137 100 66     600 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
    569              
    570             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    571             } elsif( $type eq 'text' ) {
    572 115         283 esc($type = $token->text); # reuse $type, why not
    573 115 100       366 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
    574 115         289 print $fh $type;
    575             }
    576              
    577             }
    578 58         242 return 1;
    579             }
    580              
    581             ###########################################################################
    582             #
    583              
    584             sub do_section {
    585 36     36 0 93 my($self, $name, $token) = @_;
    586 36         64 return $name;
    587             }
    588              
    589             sub do_link {
    590 22     22 0 42 my($self, $token) = @_;
    591 22         43 my $type = $token->attr('type');
    592 22 50       71 if(!defined $type) {
        100          
        100          
        50          
    593 0         0 $self->whine("Typeless L!?", $token->attr('start_line'));
    594 10         25 } elsif( $type eq 'pod') { return $self->do_pod_link($token);
    595 9         23 } elsif( $type eq 'url') { return $self->do_url_link($token);
    596 3         12 } elsif( $type eq 'man') { return $self->do_man_link($token);
    597             } else {
    598 0         0 $self->whine("L of unknown type $type!?", $token->attr('start_line'));
    599             }
    600 0         0 return 'FNORG'; # should never get called
    601             }
    602              
    603             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    604              
    605 9     9 0 18 sub do_url_link { return $_[1]->attr('to') }
    606              
    607             sub do_man_link {
    608 3     3 0 5 my ($self, $link) = @_;
    609 3         7 my $to = $link->attr('to');
    610 3         8 my $frag = $link->attr('section');
    611              
    612 3 50 33     15 return undef unless defined $to and length $to; # should never happen
    613              
    614 3 100 66     36 $frag = $self->section_escape($frag)
    615             if defined $frag and length($frag .= ''); # (stringify)
    616              
    617 3         5 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
    618              
    619 3         12 return $self->resolve_man_page_link($to, $frag);
    620             }
    621              
    622              
    623             sub do_pod_link {
    624             # And now things get really messy...
    625 10     10 0 16 my($self, $link) = @_;
    626 10         20 my $to = $link->attr('to');
    627 10         20 my $section = $link->attr('section');
    628             return undef unless( # should never happen
    629 10 50 66     45 (defined $to and length $to) or
          33        
          66        
    630             (defined $section and length $section)
    631             );
    632              
    633 10 100 66     35 $section = $self->section_escape($section)
    634             if defined $section and length($section .= ''); # (stringify)
    635              
    636 10         17 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
    637             $to || "(nil)", $section || "(nil)";
    638              
    639             {
    640             # An early hack:
    641 10         12 my $complete_url = $self->resolve_pod_link_by_table($to, $section);
      10         23  
    642 10 50       33 if( $complete_url ) {
    643 0         0 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
    644             $complete_url, "\n (Returning that.)\n";
    645 0         0 return $complete_url;
    646             } else {
    647 10         19 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
    648             " didn't return anything interesting.\n";
    649             }
    650             }
    651              
    652 10 100 66     32 if(defined $to and length $to) {
    653             # Give this routine first hack again
    654 7         17 my $there = $self->resolve_pod_link_by_table($to);
    655 7 50 33     23 if(defined $there and length $there) {
    656 0         0 DEBUG > 1
    657             and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
    658             } else {
    659 7         22 $there =
    660             $self->resolve_pod_page_link($to, $section);
    661             # (I pass it the section value, but I don't see a
    662             # particular reason it'd use it.)
    663 7         11 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
    664 7 50 33     24 unless( defined $there and length $there ) {
    665 0         0 DEBUG and print STDERR "Can't resolve $to\n";
    666 0         0 return undef;
    667             }
    668             # resolve_pod_page_link returning undef is how it
    669             # can signal that it gives up on making a link
    670             }
    671 7         16 $to = $there;
    672             }
    673              
    674             #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
    675              
    676 10 100 66     33 my $out = (defined $to and length $to) ? $to : '';
    677 10 100 66     36 $out .= "#" . $section if defined $section and length $section;
    678              
    679 10 50       21 unless(length $out) { # sanity check
    680 0         0 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
    681             $to || "(nil)", $section || "(nil)";
    682 0         0 return undef;
    683             }
    684              
    685 10         11 DEBUG and print STDERR "Resolved to $out\n";
    686 10         26 return $out;
    687             }
    688              
    689              
    690             # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
    691              
    692             sub section_escape {
    693 32     32 0 59 my($self, $section) = @_;
    694 32         64 return $self->section_url_escape(
    695             $self->section_name_tidy($section)
    696             );
    697             }
    698              
    699             sub section_name_tidy {
    700 69     69 0 128 my($self, $section) = @_;
    701 69         175 $section =~ s/^\s+//;
    702 69         162 $section =~ s/\s+$//;
    703 69         122 $section =~ tr/ /_/;
    704 69 50       289 if ($] ge 5.006) {
    705 69         143 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
    706             } elsif ('A' eq chr(65)) { # But not on early EBCDIC
    707 0         0 $section =~ tr/\x00-\x1F\x80-\x9F//d;
    708             }
    709 69         142 $section = $self->unicode_escape_url($section);
    710 69 50       145 $section = '_' unless length $section;
    711 69         166 return $section;
    712             }
    713              
    714 32     32 0 78 sub section_url_escape { shift->general_url_escape(@_) }
    715 7     7 0 15 sub pagepath_url_escape { shift->general_url_escape(@_) }
    716 3     3 0 8 sub manpage_url_escape { shift->general_url_escape(@_) }
    717              
    718             sub general_url_escape {
    719 42     42 0 79 my($self, $string) = @_;
    720              
    721 42         73 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
      0         0  
    722             # express Unicode things as urlencode(utf(orig)).
    723              
    724             # A pretty conservative escaping, behoovey even for query components
    725             # of a URL (see RFC 2396)
    726              
    727 42 50       136 if ($] ge 5.007_003) {
    728 42         80 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
      8         43  
    729             } else { # Is broken for non-ASCII platforms on early perls
    730 0         0 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
      0         0  
    731             }
    732             # Yes, stipulate the list without a range, so that this can work right on
    733             # all charsets that this module happens to run under.
    734              
    735 42         95 return $string;
    736             }
    737              
    738             #--------------------------------------------------------------------------
    739             #
    740             # Oh look, a yawning portal to Hell! Let's play touch football right by it!
    741             #
    742              
    743             sub resolve_pod_page_link {
    744             # resolve_pod_page_link must return a properly escaped URL
    745 7     7 0 11 my $self = shift;
    746 7 50       19 return $self->batch_mode()
    747             ? $self->resolve_pod_page_link_batch_mode(@_)
    748             : $self->resolve_pod_page_link_singleton_mode(@_)
    749             ;
    750             }
    751              
    752             sub resolve_pod_page_link_singleton_mode {
    753 7     7 0 23 my($self, $it) = @_;
    754 7 50 33     25 return undef unless defined $it and length $it;
    755 7         37 my $url = $self->pagepath_url_escape($it);
    756              
    757 7         15 $url =~ s{::$}{}s; # probably never comes up anyway
    758 7 50       22 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
    759              
    760 7 50       17 return undef unless length $url;
    761 7         18 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
    762             }
    763              
    764             sub resolve_pod_page_link_batch_mode {
    765 0     0 0 0 my($self, $to) = @_;
    766 0         0 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
    767 0         0 my @path = grep length($_), split m/::/s, $to, -1;
    768 0 0       0 unless( @path ) { # sanity
    769 0         0 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n";
    770 0         0 return undef;
    771             }
    772 0         0 $self->batch_mode_rectify_path(\@path);
    773 0         0 my $out = join('/', map $self->pagepath_url_escape($_), @path)
    774             . $HTML_EXTENSION;
    775 0         0 DEBUG > 1 and print STDERR " => $out\n";
    776 0         0 return $out;
    777             }
    778              
    779             sub batch_mode_rectify_path {
    780 0     0 0 0 my($self, $pathbits) = @_;
    781 0         0 my $level = $self->batch_mode_current_level;
    782 0         0 $level--; # how many levels up to go to get to the root
    783 0 0       0 if($level < 1) {
    784 0         0 unshift @$pathbits, '.'; # just to be pretty
    785             } else {
    786 0         0 unshift @$pathbits, ('..') x $level;
    787             }
    788 0         0 return;
    789             }
    790              
    791             sub resolve_man_page_link {
    792 3     3 0 9 my ($self, $to, $frag) = @_;
    793 3         7 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
    794              
    795 3 50 33     17 return undef unless defined $page and length $page;
    796 3   50     21 $section ||= 1;
    797              
    798 3         11 return $self->man_url_prefix . "$section/"
    799             . $self->manpage_url_escape($page)
    800             . $self->man_url_postfix;
    801             }
    802              
    803             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    804              
    805             sub resolve_pod_link_by_table {
    806             # A crazy hack to allow specifying custom L => URL mappings
    807              
    808 17 50   17 0 60 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut
    809              
    810 0         0 my($self, $to, $section) = @_;
    811              
    812             # TODO: add a method that actually populates podhtml_LOT from a file?
    813              
    814 0 0       0 if(defined $section) {
    815 0 0 0     0 $to = '' unless defined $to and length $to;
    816 0         0 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
    817             } else {
    818 0         0 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef!
    819             }
    820 0         0 return;
    821             }
    822              
    823             ###########################################################################
    824              
    825             sub linearize_tokens { # self, tokens
    826 37     37 0 58 my $self = shift;
    827 37         139 my $out = '';
    828              
    829 37         58 my $t;
    830 37         156 while($t = shift @_) {
    831 79 50 33     419 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
        100 66        
        100          
    832 0         0 $out .= $t; # a string, or some insane thing
    833             } elsif($t->is_text) {
    834 39         102 $out .= $t->text;
    835             } elsif($t->is_start and $t->tag eq 'X') {
    836             # Ignore until the end of this X<...> sequence:
    837 3         7 my $x_open = 1;
    838 3         6 while($x_open) {
    839 6 100       17 next if( ($t = shift @_)->is_text );
    840 3 50 33     9 if( $t->is_start and $t->tag eq 'X') { ++$x_open }
      0 50 33     0  
    841 3         11 elsif($t->is_end and $t->tag eq 'X') { --$x_open }
    842             }
    843             }
    844             }
    845 37 50       95 return undef if length $out > $Linearization_Limit;
    846 37         90 return $out;
    847             }
    848              
    849             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    850              
    851             sub unicode_escape_url {
    852 69     69 0 136 my($self, $string) = @_;
    853 69         120 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
      0         0  
    854             # Turn char 1234 into "(1234)"
    855 69         134 return $string;
    856             }
    857              
    858             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    859             sub esc { # a function.
    860 462 100   462 0 1013 if(defined wantarray) {
    861 325 100       512 if(wantarray) {
    862 274         702 @_ = splice @_; # break aliasing
    863             } else {
    864 51         74 my $x = shift;
    865 51 50       192 if ($] ge 5.007_003) {
    866 51         99 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
      0         0  
    867             } else { # Is broken for non-ASCII platforms on early perls
    868 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
      0         0  
    869             }
    870 51         178 return $x;
    871             }
    872             }
    873 411         683 foreach my $x (@_) {
    874             # Escape things very cautiously:
    875 1094 50       1784 if (defined $x) {
    876 1094 50       2853 if ($] ge 5.007_003) {
    877 1094         2138 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
      52         190  
    878             } else { # Is broken for non-ASCII platforms on early perls
    879 0         0 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
      0         0  
    880             }
    881             }
    882             # Leave out "- so that "--" won't make it thru in X-generated comments
    883             # with text in them.
    884              
    885             # Yes, stipulate the list without a range, so that this can work right on
    886             # all charsets that this module happens to run under.
    887             }
    888 411         1287 return @_;
    889             }
    890              
    891             #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    892              
    893             1;
    894             __END__