File Coverage

blib/lib/Pod/Markdown.pm
Criterion Covered Total %
statement 392 400 98.0
branch 120 130 92.3
condition 27 37 72.9
subroutine 99 101 98.0
pod 11 59 18.6
total 649 727 89.2


line stmt bran cond sub pod time code
1             # vim: set ts=2 sts=2 sw=2 expandtab smarttab:
2             #
3             # This file is part of Pod-Markdown
4             #
5             # This software is copyright (c) 2011 by Randy Stauner.
6             #
7             # This is free software; you can redistribute it and/or modify it under
8             # the same terms as the Perl 5 programming language system itself.
9             #
10 16     16   2040835 use 5.008;
  16         181  
11 16     16   81 use strict;
  16         39  
  16         477  
12 16     16   95 use warnings;
  16         29  
  16         1440  
13              
14             package Pod::Markdown;
15             # git description: v3.101-3-g70682ef
16              
17             our $AUTHORITY = 'cpan:RWSTAUNER';
18             # ABSTRACT: Convert POD to Markdown
19             $Pod::Markdown::VERSION = '3.200';
20 16     16   9092 use Pod::Simple 3.27 (); # detected_encoding and keep_encoding bug fix
  16         499098  
  16         478  
21 16     16   6783 use parent qw(Pod::Simple::Methody);
  16         4431  
  16         108  
22 16     16   19354 use Encode ();
  16         165828  
  16         324  
23 16     16   6122 use URI::Escape ();
  16         20910  
  16         2486  
24              
25             our %URL_PREFIXES = (
26             sco => 'http://search.cpan.org/perldoc?',
27             metacpan => 'https://metacpan.org/pod/',
28             man => 'http://man.he.net/man',
29             );
30             $URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};
31              
32             our $LOCAL_MODULE_RE = qr/^(Local::|\w*?_\w*)/;
33              
34             ## no critic
35             #{
36             our $HAS_HTML_ENTITIES;
37              
38             # Stolen from Pod::Simple::XHTML 3.28. {{{
39              
40             BEGIN {
41 16     16   1066 $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
42             }
43              
44             my %entities = (
45             q{>} => 'gt',
46             q{<} => 'lt',
47             q{'} => '#39',
48             q{"} => 'quot',
49             q{&} => 'amp',
50             );
51              
52             sub encode_entities {
53 234     234 0 400 my $self = shift;
54 234         314 my $ents = $self->html_encode_chars;
55 234 100       534 return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
56 49 100       121 if (defined $ents) {
57 48         94 $ents =~ s,(?<!\\)([]/]),\\$1,g;
58 48         67 $ents =~ s,(?<!\\)\\\z,\\\\,;
59             } else {
60 1         6 $ents = join '', keys %entities;
61             }
62 49         61 my $str = $_[0];
63 49   66     239 $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
  23         126  
64 49         145 return $str;
65             }
66              
67             # }}}
68              
69             # Add a few very common ones for consistency and readability
70             # (in case HTML::Entities isn't available).
71             %entities = (
72             # Pod::Markdown has always required 5.8 so unicode_to_native will be available.
73             chr(utf8::unicode_to_native(0xA0)) => 'nbsp',
74             chr(utf8::unicode_to_native(0xA9)) => 'copy',
75             %entities
76             );
77              
78             sub __entity_encode_ord_he {
79 11     11   235 my $chr = chr $_[0];
80             # Skip the encode_entities() logic and go straight for the substitution
81             # since we already have the char we know we want replaced.
82             # Both the hash and the function are documented as exportable (so should be reliable).
83 11   33     48 return $HTML::Entities::char2entity{ $chr } || HTML::Entities::num_entity( $chr );
84             }
85             sub __entity_encode_ord_basic {
86 7   33 7   225 return '&' . ($entities{chr $_[0]} || sprintf '#x%X', $_[0]) . ';';
87             }
88              
89             # From HTML::Entities 3.69
90             my $DEFAULT_ENTITY_CHARS = '^\n\r\t !\#\$%\(-;=?-~';
91              
92             #}
93             ## use critic
94              
95             # Use hash for simple "exists" check in `new` (much more accurate than `->can`).
96             my %attributes = map { ($_ => 1) }
97             qw(
98             html_encode_chars
99             match_encoding
100             output_encoding
101             local_module_re
102             local_module_url_prefix
103             man_url_prefix
104             perldoc_url_prefix
105             perldoc_fragment_format
106             markdown_fragment_format
107             include_meta_tags
108             );
109              
110              
111             sub new {
112 303     303 1 192300 my $class = shift;
113 303         793 my %args = @_;
114              
115 303         902 my $self = $class->SUPER::new();
116 303         5028 $self->preserve_whitespace(1);
117 303         2128 $self->nbsp_for_S(1);
118 303         1667 $self->accept_targets(qw( markdown html ));
119              
120             # Default to the global, but allow it to be overwritten in args.
121 303         5113 $self->local_module_re($LOCAL_MODULE_RE);
122              
123 303         1482 for my $type ( qw( perldoc man ) ){
124 606         916 my $attr = $type . '_url_prefix';
125             # Initialize to the alias.
126 606         1281 $self->$attr($type);
127             }
128              
129 303         867 while( my ($attr, $val) = each %args ){
130             # NOTE: Checking exists on a private var means we don't allow Pod::Simple
131             # attributes to be set this way. It's not very consistent, but I think
132             # I'm ok with that for now since there probably aren't many Pod::Simple attributes
133             # being changed besides `output_*` which feel like API rather than attributes.
134             # We'll see.
135             # This is currently backward-compatible as we previously just put the attribute
136             # into the private stash so anything unknown was silently ignored.
137             # We could open this up to `$self->can($attr)` in the future if that seems better
138             # but it tricked me when I was testing a misspelled attribute name
139             # which also happened to be a Pod::Simple method.
140              
141 906 100       3258 exists $attributes{ $attr } or
142             # Provide a more descriptive message than "Can't locate object method".
143             warn("Unknown argument to ${class}->new(): '$attr'"), next;
144              
145             # Call setter.
146 904         1676 $self->$attr($val);
147             }
148              
149             # TODO: call from the setters.
150 303         1503 $self->_prepare_fragment_formats;
151              
152 303         762 return $self;
153             }
154              
155             for my $type ( qw( local_module perldoc man ) ){
156             my $attr = $type . '_url_prefix';
157 16     16   118 no strict 'refs'; ## no critic
  16         31  
  16         106564  
158             *$attr = sub {
159 1000     1000   12734 my $self = shift;
160 1000 100       1664 if (@_) {
161 847   100     3072 $self->{$attr} = $URL_PREFIXES{ $_[0] } || $_[0];
162             }
163             else {
164 153         364 return $self->{$attr};
165             }
166             }
167             }
168              
169             ## Attribute accessors ##
170              
171              
172             sub html_encode_chars {
173 305     305 1 344 my $self = shift;
174 305         403 my $stash = $self->_private;
175              
176             # Setter.
177 305 100       495 if( @_ ){
178             # If false ('', 0, undef), disable.
179 71 50       135 if( !$_[0] ){
180 0         0 delete $stash->{html_encode_chars};
181 0         0 $stash->{encode_amp} = 1;
182 0         0 $stash->{encode_lt} = 1;
183             }
184             else {
185             # Special case boolean '1' to mean "all".
186             # If we have HTML::Entities, undef will use the default.
187             # Without it, we need to specify so that we use the same list (for consistency).
188 71 100       157 $stash->{html_encode_chars} = $_[0] eq '1' ? ($HAS_HTML_ENTITIES ? undef : $DEFAULT_ENTITY_CHARS) : $_[0];
    100          
189              
190             # If [char] doesn't get encoded, we need to do it ourselves.
191 71         116 $stash->{encode_amp} = ($self->encode_entities('&') eq '&');
192 71         2172 $stash->{encode_lt} = ($self->encode_entities('<') eq '<');
193             }
194 71         1538 return;
195             }
196              
197             # Getter.
198 234         320 return $stash->{html_encode_chars};
199             }
200              
201              
202             # I prefer ro-accessors (immutability!) but it can be confusing
203             # to not support the same API as other Pod::Simple classes.
204              
205             # NOTE: Pod::Simple::_accessorize is not a documented public API.
206             # Skip any that have already been defined.
207             __PACKAGE__->_accessorize(grep { !__PACKAGE__->can($_) } keys %attributes);
208              
209             sub _prepare_fragment_formats {
210 303     303   424 my ($self) = @_;
211              
212 303         739 foreach my $attr ( keys %attributes ){
213 3030 100       6998 next unless $attr =~ /^(\w+)_fragment_format/;
214 606         1100 my $type = $1;
215 606         1211 my $format = $self->$attr;
216              
217             # If one was provided.
218 606 100       3039 if( $format ){
219             # If the attribute is a coderef just use it.
220 510 100       1085 next if ref($format) eq 'CODE';
221             }
222             # Else determine a default.
223             else {
224 96 100       205 if( $type eq 'perldoc' ){
225             # Choose a default that matches the destination url.
226 47         72 my $target = $self->perldoc_url_prefix;
227 47         83 foreach my $alias ( qw( metacpan sco ) ){
228 94 100       212 if( $target eq $URL_PREFIXES{ $alias } ){
229 38         52 $format = $alias;
230             }
231             }
232             # This seems like a reasonable fallback.
233 47   100     95 $format ||= 'pod_simple_xhtml';
234             }
235             else {
236 49         66 $format = $type;
237             }
238             }
239              
240             # The short name should become a method name with the prefix prepended.
241 142         163 my $prefix = 'format_fragment_';
242 142         406 $format =~ s/^$prefix//;
243 142 50       523 die "Unknown fragment format '$format'"
244             unless $self->can($prefix . $format);
245              
246             # Save it.
247 142         284 $self->$attr($format);
248             }
249              
250 303         575 return;
251             }
252              
253             ## Backward compatible API ##
254              
255             # For backward compatibility (previously based on Pod::Parser):
256             # While Pod::Simple provides a parse_from_file() method
257             # it's primarily for Pod::Parser compatibility.
258             # When called without an output handle it will print to STDOUT
259             # but the old Pod::Markdown never printed to a handle
260             # so we don't want to start now.
261             sub parse_from_file {
262 10     10 1 17 my ($self, $file) = @_;
263              
264             # TODO: Check that all dependent cpan modules use the Pod::Simple API
265             # then add a deprecation warning here to avoid confusion.
266              
267 10         40 $self->output_string(\($self->{_as_markdown_}));
268 10         2026 $self->parse_file($file);
269             }
270              
271             # Likewise, though Pod::Simple doesn't define this method at all.
272 9     9 0 356 sub parse_from_filehandle { shift->parse_from_file(@_) }
273              
274              
275             ## Document state ##
276              
277             sub _private {
278 9786     9786   20423 my ($self) = @_;
279             $self->{_Pod_Markdown_} ||= {
280 9786   100     26551 indent => 0,
281             stacks => [],
282             states => [{}],
283             link => [],
284             encode_amp => 1,
285             encode_lt => 1,
286             };
287             }
288              
289             sub _increase_indent {
290 67 50   67   95 ++$_[0]->_private->{indent} >= 1
291             or die 'Invalid state: indent < 0';
292             }
293             sub _decrease_indent {
294 67 50   67   100 --$_[0]->_private->{indent} >= 0
295             or die 'Invalid state: indent < 0';
296             }
297              
298             sub _new_stack {
299 1005     1005   1170 push @{ $_[0]->_private->{stacks} }, [];
  1005         1516  
300 1005         1244 push @{ $_[0]->_private->{states} }, {};
  1005         1405  
301             }
302              
303             sub _last_string {
304 18     18   26 $_[0]->_private->{stacks}->[-1][-1];
305             }
306              
307             sub _pop_stack_text {
308 715     715   739 $_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
  715         1155  
309 715         917 join '', @{ pop @{ $_[0]->_private->{stacks} } };
  715         730  
  715         915  
310             }
311              
312             sub _stack_state {
313 431     431   575 $_[0]->_private->{states}->[-1];
314             }
315              
316             sub _save {
317 1343     1343   2136 my ($self, $text) = @_;
318 1343         1357 push @{ $self->_private->{stacks}->[-1] }, $text;
  1343         2075  
319             # return $text; # DEBUG
320             }
321              
322             sub _save_line {
323 494     494   786 my ($self, $text) = @_;
324              
325 494         859 $text = $self->_process_escapes($text);
326              
327 494         1266 $self->_save($text . $/);
328             }
329              
330             # For paragraphs, etc.
331             sub _save_block {
332 429     429   641 my ($self, $text) = @_;
333              
334 429         701 $self->_stack_state->{blocks}++;
335              
336 429         794 $self->_save_line($self->_indent($text) . $/);
337             }
338              
339             ## Formatting ##
340              
341             sub _chomp_all {
342 332     332   499 my ($self, $text) = @_;
343 332         1032 1 while chomp $text;
344 332         806 return $text;
345             }
346              
347             sub _indent {
348 472     472   644 my ($self, $text) = @_;
349 472         646 my $level = $self->_private->{indent};
350              
351 472 100       782 if( $level ){
352 34         69 my $indent = ' ' x ($level * 4);
353              
354             # Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
355 34         306 $text =~ s/^(.+)/$indent$1/mg;
356             }
357              
358 472         1384 return $text;
359             }
360              
361             # as_markdown() exists solely for backward compatibility
362             # and requires having called parse_from_file() to be useful.
363              
364              
365             sub as_markdown {
366 10     10 0 279 my ($parser, %args) = @_;
367 10         11 my @header;
368             # Don't add meta tags again if we've already done it.
369 10 100 100     39 if( $args{with_meta} && !$parser->include_meta_tags ){
370 3         22 @header = $parser->_build_markdown_head;
371             }
372 10         47 return join("\n" x 2, @header, $parser->{_as_markdown_});
373             }
374              
375             sub _build_markdown_head {
376 9     9   12 my $parser = shift;
377 9         14 my $data = $parser->_private;
378             return join "\n",
379 12         58 map { qq![[meta \l$_="$data->{$_}"]]! }
380 9         17 grep { defined $data->{$_} }
  18         37  
381             qw( Title Author );
382             }
383              
384             ## Escaping ##
385              
386             # http://daringfireball.net/projects/markdown/syntax#backslash
387             # Markdown provides backslash escapes for the following characters:
388             #
389             # \ backslash
390             # ` backtick
391             # * asterisk
392             # _ underscore
393             # {} curly braces
394             # [] square brackets
395             # () parentheses
396             # # hash mark
397             # + plus sign
398             # - minus sign (hyphen)
399             # . dot
400             # ! exclamation mark
401              
402             # However some of those only need to be escaped in certain places:
403             # * Backslashes *do* need to be escaped or they may be swallowed by markdown.
404             # * Word-surrounding characters (/[`*_]/) *do* need to be escaped mid-word
405             # because the markdown spec explicitly allows mid-word em*pha*sis.
406             # * I don't actually see anything that curly braces are used for.
407             # * Escaping square brackets is enough to avoid accidentally
408             # creating links and images (so we don't need to escape plain parentheses
409             # or exclamation points as that would generate a lot of unnecesary noise).
410             # Parentheses will be escaped in urls (&end_L) to avoid premature termination.
411             # * We don't need a backslash for every hash mark or every hyphen found mid-word,
412             # just the ones that start a line (likewise for plus and dot).
413             # (Those will all be handled by _escape_paragraph_markdown).
414              
415              
416             # Backslash escape markdown characters to avoid having them interpreted.
417             sub _escape_inline_markdown {
418 460     460   612 local $_ = $_[1];
419              
420             # s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
421 460         946 s/([\\`*_\[\]])/\\$1/g;
422              
423 460         748 return $_;
424             }
425              
426             # Escape markdown characters that would be interpreted
427             # at the start of a line.
428             sub _escape_paragraph_markdown {
429 342     342   506 local $_ = $_[1];
430              
431             # Escape headings, horizontal rules, (unordered) lists, and blockquotes.
432 342         686 s/^([-+#>])/\\$1/mg;
433              
434             # Markdown doesn't support backslash escapes for equal signs
435             # even though they can be used to underline a header.
436             # So use html to escape them to avoid having them interpreted.
437 342         494 s/^([=])/sprintf '&#x%x;', ord($1)/mge;
  1         6  
438              
439             # Escape the dots that would wrongfully create numbered lists.
440 342         460 s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;
441              
442 342         493 return $_;
443             }
444              
445              
446             # Additionally Markdown allows inline html so we need to escape things that look like it.
447             # While _some_ Markdown processors handle backslash-escaped html,
448             # [Daring Fireball](http://daringfireball.net/projects/markdown/syntax) states distinctly:
449             # > In HTML, there are two characters that demand special treatment: < and &...
450             # > If you want to use them as literal characters, you must escape them as entities, e.g. &lt;, and &amp;.
451              
452             # It goes on to say:
453             # > Markdown allows you to use these characters naturally,
454             # > taking care of all the necessary escaping for you.
455             # > If you use an ampersand as part of an HTML entity,
456             # > it remains unchanged; otherwise it will be translated into &amp;.
457             # > Similarly, because Markdown supports inline HTML,
458             # > if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such.
459              
460             # In order to only encode the occurrences that require it (something that
461             # could be interpreted as an entity) we escape them all so that we can do the
462             # suffix test later after the string is complete (since we don't know what
463             # strings might come after this one).
464              
465             my %_escape =
466             map {
467             my ($k, $v) = split /:/;
468             # Put the "code" marker before the char instead of after so that it doesn't
469             # get confused as the $2 (which is what requires us to entity-encode it).
470             # ( "XsX", "XcsX", "X(c?)sX" )
471             my ($s, $code, $re) = map { "\0$_$v\0" } '', map { ($_, '('.$_.'?)') } 'c';
472              
473             (
474             $k => $s,
475             $k.'_code' => $code,
476             $k.'_re' => qr/$re/,
477             )
478             }
479             qw( amp:& lt:< );
480              
481             # Make the values of this private var available to the tests.
482 1     1   100 sub __escape_sequences { %_escape }
483              
484              
485             # HTML-entity encode any characters configured by the user.
486             # If that doesn't include [&<] then we escape those chars so we can decide
487             # later if we will entity-encode them or put them back verbatim.
488             sub _encode_or_escape_entities {
489 460     460   539 my $self = $_[0];
490 460         664 my $stash = $self->_private;
491 460         646 local $_ = $_[1];
492              
493 460 100       738 if( $stash->{encode_amp} ){
    50          
494 440 100       632 if( exists($stash->{html_encode_chars}) ){
495             # Escape all amps for later processing.
496             # Pass intermediate strings to entity encoder so that it doesn't
497             # process any of the characters of our escape sequences.
498             # Use -1 to get "as many fields as possible" so that we keep leading and
499             # trailing (possibly empty) fields.
500 38         109 $_ = join $_escape{amp}, map { $self->encode_entities($_) } split /&/, $_, -1;
  57         621  
501             }
502             else {
503 402         680 s/&/$_escape{amp}/g;
504             }
505             }
506             elsif( exists($stash->{html_encode_chars}) ){
507 20         33 $_ = $self->encode_entities($_);
508             }
509              
510             s/</$_escape{lt}/g
511 460 100       2181 if $stash->{encode_lt};
512              
513 460         782 return $_;
514             }
515              
516             # From Markdown.pl version 1.0.1 line 1172 (_DoAutoLinks).
517             my $EMAIL_MARKER = qr{
518             # < # Opening token is in parent regexp.
519             (?:mailto:)?
520             (
521             [-.\w]+
522             \@
523             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
524             )
525             >
526             }x;
527              
528             # Process any escapes we put in the text earlier,
529             # now that the text is complete (end of a block).
530             sub _process_escapes {
531 494     494   587 my $self = $_[0];
532 494         616 my $stash = $self->_private;
533 494         674 local $_ = $_[1];
534              
535             # The patterns below are taken from Markdown.pl 1.0.1 _EncodeAmpsAndAngles().
536             # In this case we only want to encode the ones that Markdown won't.
537             # This is overkill but produces nicer looking text (less escaped entities).
538             # If it proves insufficent then we'll just encode them all.
539              
540             # $1: If the escape was in a code sequence, simply replace the original.
541             # $2: If the unescaped value would be followed by characters
542             # that could be interpreted as html, entity-encode it.
543             # else: The character is safe to leave bare.
544              
545             # Neither currently allows $2 to contain '0' so bool tests are sufficient.
546              
547 494 100       1247 if( $stash->{encode_amp} ){
548             # Encode & if succeeded by chars that look like an html entity.
549 461         1309 s,$_escape{amp_re}((?:#?[xX]?(?:[0-9a-fA-F]+|\w+);)?),
550 90 100       354 $1 ? '&'.$2 : $2 ? '&amp;'.$2 : '&',egos;
    100          
551             }
552              
553 494 100       914 if( $stash->{encode_lt} ){
554             # Encode < if succeeded by chars that look like an html tag.
555             # Leave email addresses (<foo@bar.com>) for Markdown to process.
556 461         1831 s,$_escape{lt_re}((?=$EMAIL_MARKER)|(?:[a-z/?\$!])?),
557 72 100       299 $1 ? '<'.$2 : $2 ? '&lt;'.$2 : '<',egos;
    100          
558             }
559              
560 494         816 return $_;
561             }
562              
563              
564             ## Parsing ##
565              
566             sub handle_text {
567 543     543 0 5677 my $self = $_[0];
568 543         837 my $stash = $self->_private;
569 543         834 local $_ = $_[1];
570              
571             # Unless we're in a code span, verbatim block, or formatted region.
572 543 100       948 unless( $stash->{no_escape} ){
573              
574             # We could, in theory, alter what gets escaped according to context
575             # (for example, escape square brackets (but not parens) inside link text).
576             # The markdown produced might look slightly nicer but either way you're
577             # at the whim of the markdown processor to interpret things correctly.
578             # For now just escape everything.
579              
580             # Don't let literal characters be interpreted as markdown.
581 460         832 $_ = $self->_escape_inline_markdown($_);
582              
583             # Entity-encode (or escape for later processing) necessary/desired chars.
584 460         790 $_ = $self->_encode_or_escape_entities($_);
585              
586             }
587             # If this _is_ a code section, do limited/specific handling.
588             else {
589             # Always escaping these chars ensures that we won't mangle the text
590             # in the unlikely event that a sequence matching our escape occurred in the
591             # input stream (since we're going to escape it and then unescape it).
592 83 100       273 s/&/$_escape{amp_code}/gos if $stash->{encode_amp};
593 83 100       252 s/</$_escape{lt_code}/gos if $stash->{encode_lt};
594             }
595              
596 543         1064 $self->_save($_);
597             }
598              
599             sub start_Document {
600 290     290 0 87147 my ($self) = @_;
601 290         569 $self->_new_stack;
602             }
603              
604             sub end_Document {
605 290     290 0 18270 my ($self) = @_;
606 290         677 $self->_check_search_header;
607 290         350 my $end = pop @{ $self->_private->{stacks} };
  290         386  
608              
609 290 50       328 @{ $self->_private->{stacks} } == 0
  290         466  
610             or die 'Document ended with stacks remaining';
611              
612 290         831 my @doc = $self->_chomp_all(join('', @$end)) . $/;
613              
614 290 100       689 if( $self->include_meta_tags ){
615 6         37 unshift @doc, $self->_build_markdown_head, ($/ x 2);
616             }
617              
618 290 100       1768 if( my $encoding = $self->_get_output_encoding ){
619             # Do the check outside the loop(s) for efficiency.
620 55 100       392 my $ents = $HAS_HTML_ENTITIES ? \&__entity_encode_ord_he : \&__entity_encode_ord_basic;
621             # Iterate indices to avoid copying large strings.
622 55         129 for my $i ( 0 .. $#doc ){
623 55         99 print { $self->{output_fh} } Encode::encode($encoding, $doc[$i], $ents);
  55         143  
624             }
625             }
626             else {
627 235         1033 print { $self->{output_fh} } @doc;
  235         784  
628             }
629             }
630              
631             sub _get_output_encoding {
632 290     290   405 my ($self) = @_;
633              
634             # If 'match_encoding' is set we need to return an encoding.
635             # If pod has no =encoding, Pod::Simple will guess if it sees a high-bit char.
636             # If there are no high-bit chars, encoding is undef.
637             # Use detected_encoding() rather than encoding() because if Pod::Simple
638             # can't use whatever encoding was specified, we probably can't either.
639             # Fallback to 'o_e' if no match is found. This gives the user the choice,
640             # since otherwise there would be no reason to specify 'o_e' *and* 'm_e'.
641             # Fallback to UTF-8 since it is a reasonable default these days.
642              
643 290 100 100     531 return $self->detected_encoding || $self->output_encoding || 'UTF-8'
644             if $self->match_encoding;
645              
646             # If output encoding wasn't specified, return false.
647 278         1353 return $self->output_encoding;
648             }
649              
650             ## Blocks ##
651              
652             sub start_Verbatim {
653 17     17 0 4056 my ($self) = @_;
654 17         54 $self->_new_stack;
655 17         32 $self->_private->{no_escape} = 1;
656             }
657              
658             sub end_Verbatim {
659 17     17 0 130 my ($self) = @_;
660              
661 17         36 my $text = $self->_pop_stack_text;
662              
663 17         54 $text = $self->_indent_verbatim($text);
664              
665 17         41 $self->_private->{no_escape} = 0;
666              
667             # Verbatim blocks do not generate a separate "Para" event.
668 17         33 $self->_save_block($text);
669             }
670              
671             sub _indent_verbatim {
672 17     17   29 my ($self, $paragraph) = @_;
673              
674             # NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
675             # Pod::Simple also has a 'strip_verbatim_indent' attribute
676             # but it doesn't sound like it gains us anything over this method.
677              
678             # POD verbatim can start with any number of spaces (or tabs)
679             # markdown should be 4 spaces (or a tab)
680             # so indent any paragraphs so that all lines start with at least 4 spaces
681 17         52 my @lines = split /\n/, $paragraph;
682 17         24 my $indent = ' ' x 4;
683 17         88 foreach my $line ( @lines ){
684 32 100       92 next unless $line =~ m/^( +)/;
685             # find the smallest indentation
686 31 100       186 $indent = $1 if length($1) < length($indent);
687             }
688 17 100       39 if( (my $smallest = length($indent)) < 4 ){
689             # invert to get what needs to be prepended
690 11         24 $indent = ' ' x (4 - $smallest);
691              
692             # Prepend indent to each line.
693             # We could check /\S/ to only indent non-blank lines,
694             # but it's backward compatible to respect the whitespace.
695             # Additionally, both pod and markdown say they ignore blank lines
696             # so it shouldn't hurt to leave them in.
697 11 100       20 $paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
  23         69  
698             }
699              
700 17         37 return $paragraph;
701             }
702              
703             sub start_Para {
704 342     342 0 121983 $_[0]->_new_stack;
705             }
706              
707             sub end_Para {
708 342     342 0 2888 my ($self) = @_;
709 342         482 my $text = $self->_pop_stack_text;
710              
711 342         633 $text = $self->_escape_paragraph_markdown($text);
712              
713 342         575 $self->_save_block($text);
714             }
715              
716              
717             ## Headings ##
718              
719 40     40 0 5069 sub start_head1 { $_[0]->_start_head(1) }
720 40     40 0 348 sub end_head1 { $_[0]->_end_head(1) }
721 11     11 0 3823 sub start_head2 { $_[0]->_start_head(2) }
722 11     11 0 104 sub end_head2 { $_[0]->_end_head(2) }
723 1     1 0 334 sub start_head3 { $_[0]->_start_head(3) }
724 1     1 0 18 sub end_head3 { $_[0]->_end_head(3) }
725 0     0 0 0 sub start_head4 { $_[0]->_start_head(4) }
726 0     0 0 0 sub end_head4 { $_[0]->_end_head(4) }
727              
728             sub _check_search_header {
729 342     342   597 my ($self) = @_;
730             # Save the text since the last heading if we want it for metadata.
731 342 100       484 if( my $last = $self->_private->{search_header} ){
732 18         27 for( $self->_private->{$last} = $self->_last_string ){
733 18         35 s/\A\s+//;
734 18         81 s/\s+\z//;
735             }
736             }
737             }
738             sub _start_head {
739 52     52   82 my ($self) = @_;
740 52         110 $self->_check_search_header;
741 52         96 $self->_new_stack;
742             }
743              
744             sub _end_head {
745 52     52   86 my ($self, $num) = @_;
746 52         89 my $h = '#' x $num;
747              
748 52         91 my $text = $self->_pop_stack_text;
749             $self->_private->{search_header} =
750 52 100       185 $text =~ /NAME/ ? 'Title'
    100          
751             : $text =~ /AUTHOR/ ? 'Author'
752             : undef;
753              
754             # TODO: option for $h suffix
755             # TODO: put a name="" if $self->{embed_anchor_tags}; ?
756             # https://rt.cpan.org/Ticket/Display.html?id=57776
757 52         132 $self->_save_block(join(' ', $h, $text));
758             }
759              
760             ## Lists ##
761              
762             # With Pod::Simple->parse_empty_lists(1) there could be an over_empty event,
763             # but what would you do with that?
764              
765             sub _start_list {
766 22     22   36 my ($self) = @_;
767 22         46 $self->_new_stack;
768              
769             # Nest again b/c start_item will pop this to look for preceding content.
770 22         47 $self->_increase_indent;
771 22         44 $self->_new_stack;
772             }
773              
774             sub _end_list {
775 22     22   38 my ($self) = @_;
776 22         38 $self->_handle_between_item_content;
777              
778             # Finish the list.
779              
780             # All the child elements should be blocks,
781             # but don't end with a double newline.
782 22         36 my $text = $self->_chomp_all($self->_pop_stack_text);
783              
784 22         63 $_[0]->_save_line($text . $/);
785             }
786              
787             sub _handle_between_item_content {
788 65     65   311 my ($self) = @_;
789              
790             # This might be empty (if the list item had no additional content).
791 65 100       125 if( my $text = $self->_pop_stack_text ){
792             # Else it's a sub-document.
793             # If there are blocks we need to separate with blank lines.
794 21 100       50 if( $self->_private->{last_state}->{blocks} ){
795 16         37 $text = $/ . $text;
796             }
797             # If not, we can condense the text.
798             # In this module's history there was a patch contributed to specifically
799             # produce "huddled" lists so we'll try to maintain that functionality.
800             else {
801 5         12 $text = $self->_chomp_all($text) . $/;
802             }
803 21         37 $self->_save($text)
804             }
805              
806 65         110 $self->_decrease_indent;
807             }
808              
809             sub _start_item {
810 43     43   59 my ($self) = @_;
811 43         86 $self->_handle_between_item_content;
812 43         69 $self->_new_stack;
813             }
814              
815             sub _end_item {
816 43     43   77 my ($self, $marker) = @_;
817 43         87 my $text = $self->_pop_stack_text;
818 43 100 66     209 $self->_save_line($self->_indent($marker .
819             # Add a space only if there is text after the marker.
820             (defined($text) && length($text) ? ' ' . $text : '')
821             ));
822              
823             # Store any possible contents in a new stack (like a sub-document).
824 43         95 $self->_increase_indent;
825 43         69 $self->_new_stack;
826             }
827              
828 7     7 0 1379 sub start_over_bullet { $_[0]->_start_list }
829 7     7 0 529 sub end_over_bullet { $_[0]->_end_list }
830              
831 18     18 0 4849 sub start_item_bullet { $_[0]->_start_item }
832 18     18 0 145 sub end_item_bullet { $_[0]->_end_item('-') }
833              
834 9     9 0 1895 sub start_over_number { $_[0]->_start_list }
835 9     9 0 1129 sub end_over_number { $_[0]->_end_list }
836              
837             sub start_item_number {
838 18     18 0 5463 $_[0]->_start_item;
839             # It seems like this should be a stack,
840             # but from testing it appears that the corresponding 'end' event
841             # comes right after the text (it doesn't surround any embedded content).
842             # See t/nested.t which shows start-item, text, end-item, para, start-item....
843 18         38 $_[0]->_private->{item_number} = $_[1]->{number};
844             }
845              
846             sub end_item_number {
847 18     18 0 150 my ($self) = @_;
848 18         26 $self->_end_item($self->_private->{item_number} . '.');
849             }
850              
851             # Markdown doesn't support definition lists
852             # so do regular (unordered) lists with indented paragraphs.
853 6     6 0 1267 sub start_over_text { $_[0]->_start_list }
854 6     6 0 617 sub end_over_text { $_[0]->_end_list }
855              
856 7     7 0 1759 sub start_item_text { $_[0]->_start_item }
857 7     7 0 59 sub end_item_text { $_[0]->_end_item('-')}
858              
859              
860             # perlpodspec equates an over/back region with no items to a blockquote.
861             sub start_over_block {
862             # NOTE: We don't actually need to indent for a blockquote.
863 3     3 0 1096 $_[0]->_new_stack;
864             }
865              
866             sub end_over_block {
867 3     3 0 491 my ($self) = @_;
868              
869             # Chomp first to avoid prefixing a blank line with a `>`.
870 3         7 my $text = $self->_chomp_all($self->_pop_stack_text);
871              
872             # NOTE: Paragraphs will already be escaped.
873              
874             # I don't really like either of these implementations
875             # but the join/map/split seems a little better and benches a little faster.
876             # You would lose the last newline but we've already chomped.
877             #$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
878 3 100       121 $text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;
  22         51  
879              
880 3         13 $self->_save_block($text);
881             }
882              
883             ## Custom Formats ##
884              
885             sub start_for {
886 10     10 0 2769 my ($self, $attr) = @_;
887 10         22 $self->_new_stack;
888              
889 10 100       29 if( $attr->{target} eq 'html' ){
890             # Use another stack so we can indent
891             # (not syntactily necessary but seems appropriate).
892 2         80 $self->_new_stack;
893 2         6 $self->_increase_indent;
894 2         3 $self->_private->{no_escape} = 1;
895             # Mark this so we know to undo it.
896 2         5 $self->_stack_state->{for_html} = 1;
897             }
898             }
899              
900             sub end_for {
901 10     10 0 972 my ($self) = @_;
902             # Data gets saved as a block (which will handle indents),
903             # but if there was html we'll alter this, so chomp and save a block again.
904 10         20 my $text = $self->_chomp_all($self->_pop_stack_text);
905              
906 10 100       23 if( $self->_private->{last_state}->{for_html} ){
907 2         4 $self->_private->{no_escape} = 0;
908             # Save it to the next stack up so we can pop it again (we made two stacks).
909 2         5 $self->_save($text);
910 2         91 $self->_decrease_indent;
911 2         5 $text = join "\n", '<div>', $self->_chomp_all($self->_pop_stack_text), '</div>';
912             }
913              
914 10         16 $self->_save_block($text);
915             }
916              
917             # Data events will be emitted for any formatted regions that have been enabled
918             # (by default, `markdown` and `html`).
919              
920             sub start_Data {
921 5     5 0 372 my ($self) = @_;
922             # TODO: limit this to what's in attr?
923 5         10 $self->_private->{no_escape}++;
924 5         9 $self->_new_stack;
925             }
926              
927             sub end_Data {
928 5     5 0 38 my ($self) = @_;
929 5         9 my $text = $self->_pop_stack_text;
930 5         9 $self->_private->{no_escape}--;
931 5         9 $self->_save_block($text);
932             }
933              
934             ## Codes ##
935              
936 94     94 0 707 sub start_B { $_[0]->_save('**') }
937 47     47 0 395 sub end_B { $_[0]->start_B() }
938              
939 36     36 0 286 sub start_I { $_[0]->_save('_') }
940 18     18 0 284 sub end_I { $_[0]->start_I() }
941              
942             sub start_C {
943 61     61 0 944 my ($self) = @_;
944 61         117 $self->_new_stack;
945 61         92 $self->_private->{no_escape}++;
946             }
947              
948             sub end_C {
949 61     61 0 491 my ($self) = @_;
950 61         87 $self->_private->{no_escape}--;
951 61         109 $self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
952             }
953              
954             # Use code spans for F<>.
955 4     4 0 56 sub start_F { shift->start_C(@_); }
956 4     4 0 34 sub end_F { shift ->end_C(@_); }
957              
958             sub start_L {
959 92     92 0 1241 my ($self, $flags) = @_;
960 92         159 $self->_new_stack;
961 92         116 push @{ $self->_private->{link} }, $flags;
  92         129  
962             }
963              
964             sub end_L {
965 92     92 0 984 my ($self) = @_;
966 92 50       101 my $flags = pop @{ $self->_private->{link} }
  92         162  
967             or die 'Invalid state: link end with no link start';
968              
969 92         140 my ($type, $to, $section) = @{$flags}{qw( type to section )};
  92         180  
970              
971 92 50       268 my $url = (
    100          
    100          
972             $type eq 'url' ? $to
973             : $type eq 'man' ? $self->format_man_url($to, $section)
974             : $type eq 'pod' ? $self->format_perldoc_url($to, $section)
975             : undef
976             );
977              
978 92         151 my $text = $self->_pop_stack_text;
979              
980             # NOTE: I don't think the perlpodspec says what to do with L<|blah>
981             # but it seems like a blank link text just doesn't make sense
982 92 100       174 if( !length($text) ){
983 4 100       9 $text =
    100          
984             $section ?
985             $to ? sprintf('"%s" in %s', $section, $to)
986             : ('"' . $section . '"')
987             : $to;
988             }
989              
990             # FIXME: What does Pod::Simple::X?HTML do for this?
991             # if we don't know how to handle the url just print the pod back out
992 92 50       236 if (!$url) {
993 0         0 $self->_save(sprintf 'L<%s>', $flags->{raw});
994 0         0 return;
995             }
996              
997             # In the url we need to escape quotes and parentheses lest markdown
998             # break the url (cut it short and/or wrongfully interpret a title).
999              
1000             # Backslash escapes do not work for the space and quotes.
1001             # URL-encoding the space is not sufficient
1002             # (the quotes confuse some parsers and produce invalid html).
1003             # I've arbitratily chosen HTML encoding to hide them from markdown
1004             # while mangling the url as litle as possible.
1005 92         600 $url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;
  11         50  
1006              
1007             # We also need to double any backslashes that may be present
1008             # (lest they be swallowed up) and stop parens from breaking the url.
1009 92         401 $url =~ s/([\\()])/\\$1/g;
1010              
1011             # TODO: put section name in title if not the same as $text
1012 92         419 $self->_save('[' . $text . '](' . $url . ')');
1013             }
1014              
1015             sub start_X {
1016 1     1 0 15 $_[0]->_new_stack;
1017             }
1018              
1019             sub end_X {
1020 1     1 0 10 my ($self) = @_;
1021 1         3 my $text = $self->_pop_stack_text;
1022             # TODO: mangle $text?
1023             # TODO: put <a name="$text"> if configured
1024             }
1025              
1026             # A code span can be delimited by multiple backticks (and a space)
1027             # similar to pod codes (C<< code >>), so ensure we use a big enough
1028             # delimiter to not have it broken by embedded backticks.
1029             sub _wrap_code_span {
1030 61     61   112 my ($self, $arg) = @_;
1031 61         82 my $longest = 0;
1032 61         161 while( $arg =~ /([`]+)/g ){
1033 5         10 my $len = length($1);
1034 5 100       16 $longest = $len if $longest < $len;
1035             }
1036 61         117 my $delim = '`' x ($longest + 1);
1037 61 100       124 my $pad = $longest > 0 ? ' ' : '';
1038 61         167 return $delim . $pad . $arg . $pad . $delim;
1039             }
1040              
1041             ## Link Formatting (TODO: Move this to another module) ##
1042              
1043              
1044             sub format_man_url {
1045 6     6 1 13 my ($self, $to) = @_;
1046 6         12 my ($page, $part) = ($to =~ /^ ([^(]+) (?: \( (\S+) \) )? /x);
1047 6   50     186 return $self->man_url_prefix . ($part || 1) . '/' . ($page || $to);
      33        
1048             }
1049              
1050              
1051             sub format_perldoc_url {
1052 69     69 1 113 my ($self, $name, $section) = @_;
1053              
1054 69         218 my $url_prefix = $self->perldoc_url_prefix;
1055 69 100 100     187 if (
      100        
1056             defined($name)
1057             && $self->is_local_module($name)
1058             && defined($self->local_module_url_prefix)
1059             ) {
1060 4         7 $url_prefix = $self->local_module_url_prefix;
1061             }
1062              
1063 69         1269 my $url = '';
1064              
1065             # If the link is to another module (external link).
1066 69 100       130 if ($name) {
1067 58         1217 $url = $url_prefix . URI::Escape::uri_escape($name);
1068             }
1069              
1070             # See https://rt.cpan.org/Ticket/Display.html?id=57776
1071             # for a discussion on the need to mangle the section.
1072 69 100       2081 if ($section){
1073              
1074 42 100       1084 my $method = $url
1075             # If we already have a prefix on the url it's external.
1076             ? $self->perldoc_fragment_format
1077             # Else an internal link points to this markdown doc.
1078             : $self->markdown_fragment_format;
1079              
1080 42 100       251 $method = 'format_fragment_' . $method
1081             unless ref($method);
1082              
1083             {
1084             # Set topic to enable code refs to be simple.
1085 42         53 local $_ = $section;
  42         52  
1086 42         89 $section = $self->$method($section);
1087             }
1088              
1089 42         116 $url .= '#' . $section;
1090             }
1091              
1092 69         380 return $url;
1093             }
1094              
1095              
1096             # TODO: simple, pandoc, etc?
1097              
1098             sub format_fragment_markdown {
1099 3     3 1 6 my ($self, $section) = @_;
1100              
1101             # If this is an internal link (to another section in this doc)
1102             # we can't be sure what the heading id's will look like
1103             # (it depends on what is rendering the markdown to html)
1104             # but we can try to follow popular conventions.
1105              
1106             # http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
1107             #$section =~ s/(?![-_.])[[:punct:]]//g;
1108             #$section =~ s/\s+/-/g;
1109 3         5 $section =~ s/\W+/-/g;
1110 3         104 $section =~ s/-+$//;
1111 3         4 $section =~ s/^-+//;
1112 3         7 $section = lc $section;
1113             #$section =~ s/^[^a-z]+//;
1114 3   50     7 $section ||= 'section';
1115              
1116 3         5 return $section;
1117             }
1118              
1119              
1120             {
1121             # From Pod::Simple::XHTML 3.28.
1122             # The strings gets passed through encode_entities() before idify().
1123             # If we don't do it here the substitutions below won't operate consistently.
1124              
1125             sub format_fragment_pod_simple_xhtml {
1126 9     9 1 10 my ($self, $t) = @_;
1127              
1128             # encode_entities {
1129             # We need to use the defaults in case html_encode_chars has been customized
1130             # (since the purpose is to match what external sources are doing).
1131              
1132 9         15 local $self->_private->{html_encode_chars};
1133 9         19 $t = $self->encode_entities($t);
1134             # }
1135              
1136             # idify {
1137 9         282 for ($t) {
1138 9         18 s/<[^>]+>//g; # Strip HTML.
1139 9         79 s/&[^;]+;//g; # Strip entities.
1140 9         74 s/^\s+//; s/\s+$//; # Strip white space.
  9         77  
1141 9         129 s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
1142 9         92 s/^[^a-zA-Z]+//; # First char must be a letter.
1143 9         73 s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
1144 9         119 s/[-:.]+$//; # Strip trailing punctuation.
1145             }
1146             # }
1147              
1148 9         41 return $t;
1149             }
1150             }
1151              
1152              
1153             sub format_fragment_pod_simple_html {
1154 9     9 1 14 my ($self, $section) = @_;
1155              
1156             # From Pod::Simple::HTML 3.28.
1157              
1158             # section_name_tidy {
1159 9         15 $section =~ s/^\s+//;
1160 9         128 $section =~ s/\s+$//;
1161 9         160 $section =~ tr/ /_/;
1162 9         224 $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
1163              
1164             #$section = $self->unicode_escape_url($section);
1165             # unicode_escape_url {
1166 9         17 $section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
  0         0  
1167             # Turn char 1234 into "(1234)"
1168             # }
1169              
1170 9 50       15 $section = '_' unless length $section;
1171 9         19 return $section;
1172             # }
1173             }
1174              
1175              
1176 9     9 1 19 sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
1177 9     9 1 19 sub format_fragment_sco { shift->format_fragment_pod_simple_html(@_); }
1178              
1179              
1180             sub is_local_module {
1181 58     58 1 79 my ($self, $name) = @_;
1182              
1183 58         159 return ($name =~ $self->local_module_re);
1184             }
1185              
1186             1;
1187              
1188             __END__
1189              
1190             =pod
1191              
1192             =encoding UTF-8
1193              
1194             =for :stopwords Marcel Gruenauer Victor Moral Ryan C. Thompson <rct at thompsonclan d0t
1195             org> Aristotle Pagaltzis Randy Stauner ACKNOWLEDGEMENTS html cpan
1196             testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto
1197             metadata placeholders metacpan
1198              
1199             =head1 NAME
1200              
1201             Pod::Markdown - Convert POD to Markdown
1202              
1203             =head1 VERSION
1204              
1205             version 3.200
1206              
1207             =for test_synopsis my ($pod_string);
1208              
1209             =head1 SYNOPSIS
1210              
1211             # Pod::Simple API is supported.
1212              
1213             # Command line usage: Parse a pod file and print to STDOUT:
1214             # $ perl -MPod::Markdown -e 'Pod::Markdown->new->filter(@ARGV)' path/to/POD/file > README.md
1215              
1216             # Work with strings:
1217             my $markdown;
1218             my $parser = Pod::Markdown->new;
1219             $parser->output_string(\$markdown);
1220             $parser->parse_string_document($pod_string);
1221              
1222             # See Pod::Simple docs for more.
1223              
1224             =head1 DESCRIPTION
1225              
1226             This module uses L<Pod::Simple> to convert POD to Markdown.
1227              
1228             Literal characters in Pod that are special in Markdown
1229             (like *asterisks*) are backslash-escaped when appropriate.
1230              
1231             By default C<markdown> and C<html> formatted regions are accepted.
1232             Regions of C<markdown> will be passed through unchanged.
1233             Regions of C<html> will be placed inside a C<< E<lt>divE<gt> >> tag
1234             so that markdown characters won't be processed.
1235             Regions of C<:markdown> or C<:html> will be processed as POD and included.
1236             To change which regions are accepted use the L<Pod::Simple> API:
1237              
1238             my $parser = Pod::Markdown->new;
1239             $parser->unaccept_targets(qw( markdown html ));
1240              
1241             =head2 A note on encoding and escaping
1242              
1243             The common L<Pod::Simple> API returns a character string.
1244             If you want Pod::Markdown to return encoded octets, there are two attributes
1245             to assist: L</match_encoding> and L</output_encoding>.
1246              
1247             When an output encoding is requested any characters that are not valid
1248             for that encoding will be escaped as HTML entities.
1249              
1250             This is not 100% safe, however.
1251              
1252             Markdown escapes all ampersands inside of code spans, so escaping a character
1253             as an HTML entity inside of a code span will not be correct.
1254             However, with pod's C<S> and C<E> sequences it is possible
1255             to end up with high-bit characters inside of code spans.
1256              
1257             So, while C<< output_encoding => 'ascii' >> can work, it is not recommended.
1258             For these reasons (and more), C<UTF-8> is the default, fallback encoding (when one is required).
1259              
1260             If you prefer HTML entities over literal characters you can use
1261             L</html_encode_chars> which will only operate outside of code spans (where it is safe).
1262              
1263             =head1 METHODS
1264              
1265             =head2 new
1266              
1267             Pod::Markdown->new(%options);
1268              
1269             The constructor accepts the following named arguments:
1270              
1271             =over 4
1272              
1273             =item *
1274              
1275             C<local_module_url_prefix>
1276              
1277             Alters the perldoc urls that are created from C<< LE<lt>E<gt> >> codes
1278             when the module is a "local" module (C<"Local::*"> or C<"Foo_Corp::*"> (see L<perlmodlib>)).
1279              
1280             The default is to use C<perldoc_url_prefix>.
1281              
1282             =item *
1283              
1284             C<local_module_re>
1285              
1286             Alternate regular expression for determining "local" modules.
1287             Default is C<< our $LOCAL_MODULE_RE = qr/^(Local::|\w*?_\w*)/ >>.
1288              
1289             =item *
1290              
1291             C<man_url_prefix>
1292              
1293             Alters the man page urls that are created from C<< LE<lt>E<gt> >> codes.
1294              
1295             The default is C<http://man.he.net/man>.
1296              
1297             =item *
1298              
1299             C<perldoc_url_prefix>
1300              
1301             Alters the perldoc urls that are created from C<< LE<lt>E<gt> >> codes.
1302             Can be:
1303              
1304             =over 4
1305              
1306             =item *
1307              
1308             C<metacpan> (shortcut for C<https://metacpan.org/pod/>)
1309              
1310             =item *
1311              
1312             C<sco> (shortcut for C<http://search.cpan.org/perldoc?>)
1313              
1314             =item *
1315              
1316             any url
1317              
1318             =back
1319              
1320             The default is C<metacpan>.
1321              
1322             Pod::Markdown->new(perldoc_url_prefix => 'http://localhost/perl/pod');
1323              
1324             =item *
1325              
1326             C<perldoc_fragment_format>
1327              
1328             Alters the format of the url fragment for any C<< LE<lt>E<gt> >> links
1329             that point to a section of an external document (C<< L<name/section> >>).
1330             The default will be chosen according to the destination L</perldoc_url_prefix>.
1331             Alternatively you can specify one of the following:
1332              
1333             =over 4
1334              
1335             =item *
1336              
1337             C<metacpan>
1338              
1339             =item *
1340              
1341             C<sco>
1342              
1343             =item *
1344              
1345             C<pod_simple_xhtml>
1346              
1347             =item *
1348              
1349             C<pod_simple_html>
1350              
1351             =item *
1352              
1353             A code ref
1354              
1355             =back
1356              
1357             The code ref can expect to receive two arguments:
1358             the parser object (C<$self>) and the section text.
1359             For convenience the topic variable (C<$_>) is also set to the section text:
1360              
1361             perldoc_fragment_format => sub { s/\W+/-/g; }
1362              
1363             =item *
1364              
1365             C<markdown_fragment_format>
1366              
1367             Alters the format of the url fragment for any C<< LE<lt>E<gt> >> links
1368             that point to an internal section of this document (C<< L</section> >>).
1369              
1370             Unfortunately the format of the id attributes produced
1371             by whatever system translates the markdown into html is unknown at the time
1372             the markdown is generated so we do some simple clean up.
1373              
1374             B<Note:> C<markdown_fragment_format> and C<perldoc_fragment_format> accept
1375             the same values: a (shortcut to a) method name or a code ref.
1376              
1377             =item *
1378              
1379             C<include_meta_tags>
1380              
1381             Specifies whether or not to print author/title meta tags at the top of the document.
1382             Default is false.
1383              
1384             =back
1385              
1386             =head2 html_encode_chars
1387              
1388             A string of characters to encode as html entities
1389             (using L<HTML::Entities/encode_entities> if available, falling back to numeric entities if not).
1390              
1391             Possible values:
1392              
1393             =over 4
1394              
1395             =item *
1396              
1397             A value of C<1> will use the default set of characters from L<HTML::Entities> (control chars, high-bit chars, and C<< <&>"' >>).
1398              
1399             =item *
1400              
1401             A false value will disable.
1402              
1403             =item *
1404              
1405             Any other value is used as a string of characters (like a regular expression character class).
1406              
1407             =back
1408              
1409             By default this is disabled and literal characters will be in the output stream.
1410             If you specify a desired L</output_encoding> any characters not valid for that encoding will be HTML entity encoded.
1411              
1412             B<Note> that Markdown requires ampersands (C<< & >>) and left angle brackets (C<< < >>)
1413             to be entity-encoded if they could otherwise be interpreted as html entities.
1414             If this attribute is configured to encode those characters, they will always be encoded.
1415             If not, the module will make an effort to only encode the ones required,
1416             so there will be less html noise in the output.
1417              
1418             =head2 match_encoding
1419              
1420             Boolean: If true, use the C<< =encoding >> of the input pod
1421             as the encoding for the output.
1422              
1423             If no encoding is specified, L<Pod::Simple> will guess the encoding
1424             if it sees a high-bit character.
1425              
1426             If no encoding is guessed (or the specified encoding is unusable),
1427             L</output_encoding> will be used if it was specified.
1428             Otherwise C<UTF-8> will be used.
1429              
1430             This attribute is not recommended
1431             but is provided for consistency with other pod converters.
1432              
1433             Defaults to false.
1434              
1435             =head2 output_encoding
1436              
1437             The encoding to use when writing to the output file handle.
1438              
1439             If neither this nor L</match_encoding> are specified,
1440             a character string will be returned in whatever L<Pod::Simple> output method you specified.
1441              
1442             =head2 local_module_re
1443              
1444             Returns the regular expression used to determine local modules.
1445              
1446             =head2 local_module_url_prefix
1447              
1448             Returns the url prefix in use for local modules.
1449              
1450             =head2 man_url_prefix
1451              
1452             Returns the url prefix in use for man pages.
1453              
1454             =head2 perldoc_url_prefix
1455              
1456             Returns the url prefix in use (after resolving shortcuts to urls).
1457              
1458             =head2 perldoc_fragment_format
1459              
1460             Returns the coderef or format name used to format a url fragment
1461             to a section in an external document.
1462              
1463             =head2 markdown_fragment_format
1464              
1465             Returns the coderef or format name used to format a url fragment
1466             to an internal section in this document.
1467              
1468             =head2 include_meta_tags
1469              
1470             Returns the boolean value indicating
1471             whether or not meta tags will be printed.
1472              
1473             =head2 format_man_url
1474              
1475             Used internally to create a url (using L</man_url_prefix>)
1476             from a string like C<man(1)>.
1477              
1478             =head2 format_perldoc_url
1479              
1480             # With $name and section being the two parts of L<name/section>.
1481             my $url = $parser->format_perldoc_url($name, $section);
1482              
1483             Used internally to create a url from
1484             the name (of a module or script)
1485             and a possible section (heading).
1486              
1487             The format of the url fragment (when pointing to a section in a document)
1488             varies depending on the destination url
1489             so L</perldoc_fragment_format> is used (which can be customized).
1490              
1491             If the module name portion of the link is blank
1492             then the section is treated as an internal fragment link
1493             (to a section of the generated markdown document)
1494             and L</markdown_fragment_format> is used (which can be customized).
1495              
1496             =head2 format_fragment_markdown
1497              
1498             Format url fragment for an internal link
1499             by replacing non-word characters with dashes.
1500              
1501             =head2 format_fragment_pod_simple_xhtml
1502              
1503             Format url fragment like L<Pod::Simple::XHTML/idify>.
1504              
1505             =head2 format_fragment_pod_simple_html
1506              
1507             Format url fragment like L<Pod::Simple::HTML/section_name_tidy>.
1508              
1509             =head2 format_fragment_metacpan
1510              
1511             Format fragment for L<metacpan.org>
1512             (uses L</format_fragment_pod_simple_xhtml>).
1513              
1514             =head2 format_fragment_sco
1515              
1516             Format fragment for L<search.cpan.org>
1517             (uses L</format_fragment_pod_simple_html>).
1518              
1519             =head2 is_local_module
1520              
1521             Uses C<local_module_re> to determine if passed module is a "local" module.
1522              
1523             =for Pod::Coverage parse_from_file
1524             parse_from_filehandle
1525              
1526             =for Pod::Coverage as_markdown
1527              
1528             =for Pod::Coverage handle_text
1529             end_.+
1530             start_.+
1531             encode_entities
1532              
1533             =head1 SEE ALSO
1534              
1535             =over 4
1536              
1537             =item *
1538              
1539             L<pod2markdown> - script included for command line usage
1540              
1541             =item *
1542              
1543             L<Pod::Simple> - Super class that handles Pod parsing
1544              
1545             =item *
1546              
1547             L<perlpod> - For writing POD
1548              
1549             =item *
1550              
1551             L<perlpodspec> - For parsing POD
1552              
1553             =item *
1554              
1555             L<http://daringfireball.net/projects/markdown/syntax> - Markdown spec
1556              
1557             =back
1558              
1559             =head1 SUPPORT
1560              
1561             =head2 Perldoc
1562              
1563             You can find documentation for this module with the perldoc command.
1564              
1565             perldoc Pod::Markdown
1566              
1567             =head2 Websites
1568              
1569             The following websites have more information about this module, and may be of help to you. As always,
1570             in addition to those websites please use your favorite search engine to discover more resources.
1571              
1572             =over 4
1573              
1574             =item *
1575              
1576             MetaCPAN
1577              
1578             A modern, open-source CPAN search engine, useful to view POD in HTML format.
1579              
1580             L<https://metacpan.org/release/Pod-Markdown>
1581              
1582             =back
1583              
1584             =head2 Bugs / Feature Requests
1585              
1586             Please report any bugs or feature requests by email to C<bug-pod-markdown at rt.cpan.org>, or through
1587             the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=Pod-Markdown>. You will be automatically notified of any
1588             progress on the request by the system.
1589              
1590             =head2 Source Code
1591              
1592              
1593             L<https://github.com/rwstauner/Pod-Markdown>
1594              
1595             git clone https://github.com/rwstauner/Pod-Markdown.git
1596              
1597             =head1 AUTHORS
1598              
1599             =over 4
1600              
1601             =item *
1602              
1603             Marcel Gruenauer <marcel@cpan.org>
1604              
1605             =item *
1606              
1607             Victor Moral <victor@taquiones.net>
1608              
1609             =item *
1610              
1611             Ryan C. Thompson <rct at thompsonclan d0t org>
1612              
1613             =item *
1614              
1615             Aristotle Pagaltzis <pagaltzis@gmx.de>
1616              
1617             =item *
1618              
1619             Randy Stauner <rwstauner@cpan.org>
1620              
1621             =back
1622              
1623             =head1 CONTRIBUTORS
1624              
1625             =for stopwords Aristotle Pagaltzis Cindy Wang (CindyLinz) Graham Ollis Mike Covington motemen moznion Peter Vereshagin Ryan C. Thompson Yasutaka ATARASHI
1626              
1627             =over 4
1628              
1629             =item *
1630              
1631             Aristotle Pagaltzis <aristotle@cpan.org>
1632              
1633             =item *
1634              
1635             Cindy Wang (CindyLinz) <cindylinz@gmail.com>
1636              
1637             =item *
1638              
1639             Graham Ollis <plicease@cpan.org>
1640              
1641             =item *
1642              
1643             Mike Covington <mfcovington@gmail.com>
1644              
1645             =item *
1646              
1647             motemen <motemen@cpan.org>
1648              
1649             =item *
1650              
1651             moznion <moznion@cpan.org>
1652              
1653             =item *
1654              
1655             Peter Vereshagin <veresc@cpan.org>
1656              
1657             =item *
1658              
1659             Ryan C. Thompson <rthompson@cpan.org>
1660              
1661             =item *
1662              
1663             Yasutaka ATARASHI <yakex@cpan.org>
1664              
1665             =back
1666              
1667             =head1 COPYRIGHT AND LICENSE
1668              
1669             This software is copyright (c) 2011 by Randy Stauner.
1670              
1671             This is free software; you can redistribute it and/or modify it under
1672             the same terms as the Perl 5 programming language system itself.
1673              
1674             =cut