File Coverage

blib/lib/Pod/Markdown.pm

Criterion	Covered	Total	%
statement	392	400	98.0
branch	120	130	92.3
condition	27	37	72.9
subroutine	99	101	98.0
pod	11	59	18.6
total	649	727	89.2

line	stmt	bran	cond	sub	pod	time	code
1							# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
2							#
3							# This file is part of Pod-Markdown
4							#
5							# This software is copyright (c) 2011 by Randy Stauner.
6							#
7							# This is free software; you can redistribute it and/or modify it under
8							# the same terms as the Perl 5 programming language system itself.
9							#
10	16			16		2040835	use 5.008;
	16					181
11	16			16		81	use strict;
	16					39
	16					477
12	16			16		95	use warnings;
	16					29
	16					1440
13
14							package Pod::Markdown;
15							# git description: v3.101-3-g70682ef
16
17							our $AUTHORITY = 'cpan:RWSTAUNER';
18							# ABSTRACT: Convert POD to Markdown
19							$Pod::Markdown::VERSION = '3.200';
20	16			16		9092	use Pod::Simple 3.27 (); # detected_encoding and keep_encoding bug fix
	16					499098
	16					478
21	16			16		6783	use parent qw(Pod::Simple::Methody);
	16					4431
	16					108
22	16			16		19354	use Encode ();
	16					165828
	16					324
23	16			16		6122	use URI::Escape ();
	16					20910
	16					2486
24
25							our %URL_PREFIXES = (
26							sco => 'http://search.cpan.org/perldoc?',
27							metacpan => 'https://metacpan.org/pod/',
28							man => 'http://man.he.net/man',
29							);
30							$URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};
31
32							our $LOCAL_MODULE_RE = qr/^(Local::\|\w?_\w)/;
33
34							## no critic
35							#{
36							our $HAS_HTML_ENTITIES;
37
38							# Stolen from Pod::Simple::XHTML 3.28. {{{
39
40							BEGIN {
41	16			16		1066	$HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
42							}
43
44							my %entities = (
45							q{>} => 'gt',
46							q{<} => 'lt',
47							q{'} => '#39',
48							q{"} => 'quot',
49							q{&} => 'amp',
50							);
51
52							sub encode_entities {
53	234			234	0	400	my $self = shift;
54	234					314	my $ents = $self->html_encode_chars;
55	234	100				534	return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
56	49	100				121	if (defined $ents) {
57	48					94	$ents =~ s,(?<!\\)([]/]),\\$1,g;
58	48					67	$ents =~ s,(?<!\\)\\\z,\\\\,;
59							} else {
60	1					6	$ents = join '', keys %entities;
61							}
62	49					61	my $str = $_[0];
63	49		66			239	$str =~ s/([$ents])/'&' . ($entities{$1} \|\| sprintf '#x%X', ord $1) . ';'/ge;
	23					126
64	49					145	return $str;
65							}
66
67							# }}}
68
69							# Add a few very common ones for consistency and readability
70							# (in case HTML::Entities isn't available).
71							%entities = (
72							# Pod::Markdown has always required 5.8 so unicode_to_native will be available.
73							chr(utf8::unicode_to_native(0xA0)) => 'nbsp',
74							chr(utf8::unicode_to_native(0xA9)) => 'copy',
75							%entities
76							);
77
78							sub __entity_encode_ord_he {
79	11			11		235	my $chr = chr $_[0];
80							# Skip the encode_entities() logic and go straight for the substitution
81							# since we already have the char we know we want replaced.
82							# Both the hash and the function are documented as exportable (so should be reliable).
83	11		33			48	return $HTML::Entities::char2entity{ $chr } \|\| HTML::Entities::num_entity( $chr );
84							}
85							sub __entity_encode_ord_basic {
86	7		33	7		225	return '&' . ($entities{chr $_[0]} \|\| sprintf '#x%X', $_[0]) . ';';
87							}
88
89							# From HTML::Entities 3.69
90							my $DEFAULT_ENTITY_CHARS = '^\n\r\t !\#\$%\(-;=?-~';
91
92							#}
93							## use critic
94
95							# Use hash for simple "exists" check in `new` (much more accurate than `->can`).
96							my %attributes = map { ($_ => 1) }
97							qw(
98							html_encode_chars
99							match_encoding
100							output_encoding
101							local_module_re
102							local_module_url_prefix
103							man_url_prefix
104							perldoc_url_prefix
105							perldoc_fragment_format
106							markdown_fragment_format
107							include_meta_tags
108							);
109
110
111							sub new {
112	303			303	1	192300	my $class = shift;
113	303					793	my %args = @_;
114
115	303					902	my $self = $class->SUPER::new();
116	303					5028	$self->preserve_whitespace(1);
117	303					2128	$self->nbsp_for_S(1);
118	303					1667	$self->accept_targets(qw( markdown html ));
119
120							# Default to the global, but allow it to be overwritten in args.
121	303					5113	$self->local_module_re($LOCAL_MODULE_RE);
122
123	303					1482	for my $type ( qw( perldoc man ) ){
124	606					916	my $attr = $type . '_url_prefix';
125							# Initialize to the alias.
126	606					1281	$self->$attr($type);
127							}
128
129	303					867	while( my ($attr, $val) = each %args ){
130							# NOTE: Checking exists on a private var means we don't allow Pod::Simple
131							# attributes to be set this way. It's not very consistent, but I think
132							# I'm ok with that for now since there probably aren't many Pod::Simple attributes
133							# being changed besides `output_*` which feel like API rather than attributes.
134							# We'll see.
135							# This is currently backward-compatible as we previously just put the attribute
136							# into the private stash so anything unknown was silently ignored.
137							# We could open this up to `$self->can($attr)` in the future if that seems better
138							# but it tricked me when I was testing a misspelled attribute name
139							# which also happened to be a Pod::Simple method.
140
141	906	100				3258	exists $attributes{ $attr } or
142							# Provide a more descriptive message than "Can't locate object method".
143							warn("Unknown argument to ${class}->new(): '$attr'"), next;
144
145							# Call setter.
146	904					1676	$self->$attr($val);
147							}
148
149							# TODO: call from the setters.
150	303					1503	$self->_prepare_fragment_formats;
151
152	303					762	return $self;
153							}
154
155							for my $type ( qw( local_module perldoc man ) ){
156							my $attr = $type . '_url_prefix';
157	16			16		118	no strict 'refs'; ## no critic
	16					31
	16					106564
158							*$attr = sub {
159	1000			1000		12734	my $self = shift;
160	1000	100				1664	if (@_) {
161	847		100			3072	$self->{$attr} = $URL_PREFIXES{ $_[0] } \|\| $_[0];
162							}
163							else {
164	153					364	return $self->{$attr};
165							}
166							}
167							}
168
169							## Attribute accessors ##
170
171
172							sub html_encode_chars {
173	305			305	1	344	my $self = shift;
174	305					403	my $stash = $self->_private;
175
176							# Setter.
177	305	100				495	if( @_ ){
178							# If false ('', 0, undef), disable.
179	71	50				135	if( !$_[0] ){
180	0					0	delete $stash->{html_encode_chars};
181	0					0	$stash->{encode_amp} = 1;
182	0					0	$stash->{encode_lt} = 1;
183							}
184							else {
185							# Special case boolean '1' to mean "all".
186							# If we have HTML::Entities, undef will use the default.
187							# Without it, we need to specify so that we use the same list (for consistency).
188	71	100				157	$stash->{html_encode_chars} = $_[0] eq '1' ? ($HAS_HTML_ENTITIES ? undef : $DEFAULT_ENTITY_CHARS) : $_[0];
		100
189
190							# If [char] doesn't get encoded, we need to do it ourselves.
191	71					116	$stash->{encode_amp} = ($self->encode_entities('&') eq '&');
192	71					2172	$stash->{encode_lt} = ($self->encode_entities('<') eq '<');
193							}
194	71					1538	return;
195							}
196
197							# Getter.
198	234					320	return $stash->{html_encode_chars};
199							}
200
201
202							# I prefer ro-accessors (immutability!) but it can be confusing
203							# to not support the same API as other Pod::Simple classes.
204
205							# NOTE: Pod::Simple::_accessorize is not a documented public API.
206							# Skip any that have already been defined.
207							__PACKAGE__->_accessorize(grep { !__PACKAGE__->can($_) } keys %attributes);
208
209							sub _prepare_fragment_formats {
210	303			303		424	my ($self) = @_;
211
212	303					739	foreach my $attr ( keys %attributes ){
213	3030	100				6998	next unless $attr =~ /^(\w+)_fragment_format/;
214	606					1100	my $type = $1;
215	606					1211	my $format = $self->$attr;
216
217							# If one was provided.
218	606	100				3039	if( $format ){
219							# If the attribute is a coderef just use it.
220	510	100				1085	next if ref($format) eq 'CODE';
221							}
222							# Else determine a default.
223							else {
224	96	100				205	if( $type eq 'perldoc' ){
225							# Choose a default that matches the destination url.
226	47					72	my $target = $self->perldoc_url_prefix;
227	47					83	foreach my $alias ( qw( metacpan sco ) ){
228	94	100				212	if( $target eq $URL_PREFIXES{ $alias } ){
229	38					52	$format = $alias;
230							}
231							}
232							# This seems like a reasonable fallback.
233	47		100			95	$format \|\|= 'pod_simple_xhtml';
234							}
235							else {
236	49					66	$format = $type;
237							}
238							}
239
240							# The short name should become a method name with the prefix prepended.
241	142					163	my $prefix = 'format_fragment_';
242	142					406	$format =~ s/^$prefix//;
243	142	50				523	die "Unknown fragment format '$format'"
244							unless $self->can($prefix . $format);
245
246							# Save it.
247	142					284	$self->$attr($format);
248							}
249
250	303					575	return;
251							}
252
253							## Backward compatible API ##
254
255							# For backward compatibility (previously based on Pod::Parser):
256							# While Pod::Simple provides a parse_from_file() method
257							# it's primarily for Pod::Parser compatibility.
258							# When called without an output handle it will print to STDOUT
259							# but the old Pod::Markdown never printed to a handle
260							# so we don't want to start now.
261							sub parse_from_file {
262	10			10	1	17	my ($self, $file) = @_;
263
264							# TODO: Check that all dependent cpan modules use the Pod::Simple API
265							# then add a deprecation warning here to avoid confusion.
266
267	10					40	$self->output_string(\($self->{_as_markdown_}));
268	10					2026	$self->parse_file($file);
269							}
270
271							# Likewise, though Pod::Simple doesn't define this method at all.
272	9			9	0	356	sub parse_from_filehandle { shift->parse_from_file(@_) }
273
274
275							## Document state ##
276
277							sub _private {
278	9786			9786		20423	my ($self) = @_;
279							$self->{_Pod_Markdown_} \|\|= {
280	9786		100			26551	indent => 0,
281							stacks => [],
282							states => [{}],
283							link => [],
284							encode_amp => 1,
285							encode_lt => 1,
286							};
287							}
288
289							sub _increase_indent {
290	67	50		67		95	++$_[0]->_private->{indent} >= 1
291							or die 'Invalid state: indent < 0';
292							}
293							sub _decrease_indent {
294	67	50		67		100	--$_[0]->_private->{indent} >= 0
295							or die 'Invalid state: indent < 0';
296							}
297
298							sub _new_stack {
299	1005			1005		1170	push @{ $_[0]->_private->{stacks} }, [];
	1005					1516
300	1005					1244	push @{ $_[0]->_private->{states} }, {};
	1005					1405
301							}
302
303							sub _last_string {
304	18			18		26	$_[0]->_private->{stacks}->[-1][-1];
305							}
306
307							sub _pop_stack_text {
308	715			715		739	$_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
	715					1155
309	715					917	join '', @{ pop @{ $_[0]->_private->{stacks} } };
	715					730
	715					915
310							}
311
312							sub _stack_state {
313	431			431		575	$_[0]->_private->{states}->[-1];
314							}
315
316							sub _save {
317	1343			1343		2136	my ($self, $text) = @_;
318	1343					1357	push @{ $self->_private->{stacks}->[-1] }, $text;
	1343					2075
319							# return $text; # DEBUG
320							}
321
322							sub _save_line {
323	494			494		786	my ($self, $text) = @_;
324
325	494					859	$text = $self->_process_escapes($text);
326
327	494					1266	$self->_save($text . $/);
328							}
329
330							# For paragraphs, etc.
331							sub _save_block {
332	429			429		641	my ($self, $text) = @_;
333
334	429					701	$self->_stack_state->{blocks}++;
335
336	429					794	$self->_save_line($self->_indent($text) . $/);
337							}
338
339							## Formatting ##
340
341							sub _chomp_all {
342	332			332		499	my ($self, $text) = @_;
343	332					1032	1 while chomp $text;
344	332					806	return $text;
345							}
346
347							sub _indent {
348	472			472		644	my ($self, $text) = @_;
349	472					646	my $level = $self->_private->{indent};
350
351	472	100				782	if( $level ){
352	34					69	my $indent = ' ' x ($level * 4);
353
354							# Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
355	34					306	$text =~ s/^(.+)/$indent$1/mg;
356							}
357
358	472					1384	return $text;
359							}
360
361							# as_markdown() exists solely for backward compatibility
362							# and requires having called parse_from_file() to be useful.
363
364
365							sub as_markdown {
366	10			10	0	279	my ($parser, %args) = @_;
367	10					11	my @header;
368							# Don't add meta tags again if we've already done it.
369	10	100	100			39	if( $args{with_meta} && !$parser->include_meta_tags ){
370	3					22	@header = $parser->_build_markdown_head;
371							}
372	10					47	return join("\n" x 2, @header, $parser->{_as_markdown_});
373							}
374
375							sub _build_markdown_head {
376	9			9		12	my $parser = shift;
377	9					14	my $data = $parser->_private;
378							return join "\n",
379	12					58	map { qq![[meta \l$_="$data->{$_}"]]! }
380	9					17	grep { defined $data->{$_} }
	18					37
381							qw( Title Author );
382							}
383
384							## Escaping ##
385
386							# http://daringfireball.net/projects/markdown/syntax#backslash
387							# Markdown provides backslash escapes for the following characters:
388							#
389							# \ backslash
390							# ` backtick
391							# * asterisk
392							# _ underscore
393							# {} curly braces
394							# [] square brackets
395							# () parentheses
396							# # hash mark
397							# + plus sign
398							# - minus sign (hyphen)
399							# . dot
400							# ! exclamation mark
401
402							# However some of those only need to be escaped in certain places:
403							# * Backslashes do need to be escaped or they may be swallowed by markdown.
404							# * Word-surrounding characters (/[`_]/) do* need to be escaped mid-word
405							# because the markdown spec explicitly allows mid-word emphasis.
406							# * I don't actually see anything that curly braces are used for.
407							# * Escaping square brackets is enough to avoid accidentally
408							# creating links and images (so we don't need to escape plain parentheses
409							# or exclamation points as that would generate a lot of unnecesary noise).
410							# Parentheses will be escaped in urls (&end_L) to avoid premature termination.
411							# * We don't need a backslash for every hash mark or every hyphen found mid-word,
412							# just the ones that start a line (likewise for plus and dot).
413							# (Those will all be handled by _escape_paragraph_markdown).
414
415
416							# Backslash escape markdown characters to avoid having them interpreted.
417							sub _escape_inline_markdown {
418	460			460		612	local $_ = $_[1];
419
420							# s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
421	460					946	s/([\\`*_\[\]])/\\$1/g;
422
423	460					748	return $_;
424							}
425
426							# Escape markdown characters that would be interpreted
427							# at the start of a line.
428							sub _escape_paragraph_markdown {
429	342			342		506	local $_ = $_[1];
430
431							# Escape headings, horizontal rules, (unordered) lists, and blockquotes.
432	342					686	s/^([-+#>])/\\$1/mg;
433
434							# Markdown doesn't support backslash escapes for equal signs
435							# even though they can be used to underline a header.
436							# So use html to escape them to avoid having them interpreted.
437	342					494	s/^([=])/sprintf '&#x%x;', ord($1)/mge;
	1					6
438
439							# Escape the dots that would wrongfully create numbered lists.
440	342					460	s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;
441
442	342					493	return $_;
443							}
444
445
446							# Additionally Markdown allows inline html so we need to escape things that look like it.
447							# While _some_ Markdown processors handle backslash-escaped html,
448							# [Daring Fireball](http://daringfireball.net/projects/markdown/syntax) states distinctly:
449							# > In HTML, there are two characters that demand special treatment: < and &...
450							# > If you want to use them as literal characters, you must escape them as entities, e.g. <, and &.
451
452							# It goes on to say:
453							# > Markdown allows you to use these characters naturally,
454							# > taking care of all the necessary escaping for you.
455							# > If you use an ampersand as part of an HTML entity,
456							# > it remains unchanged; otherwise it will be translated into &.
457							# > Similarly, because Markdown supports inline HTML,
458							# > if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such.
459
460							# In order to only encode the occurrences that require it (something that
461							# could be interpreted as an entity) we escape them all so that we can do the
462							# suffix test later after the string is complete (since we don't know what
463							# strings might come after this one).
464
465							my %_escape =
466							map {
467							my ($k, $v) = split /:/;
468							# Put the "code" marker before the char instead of after so that it doesn't
469							# get confused as the $2 (which is what requires us to entity-encode it).
470							# ( "XsX", "XcsX", "X(c?)sX" )
471							my ($s, $code, $re) = map { "\0$_$v\0" } '', map { ($_, '('.$_.'?)') } 'c';
472
473							(
474							$k => $s,
475							$k.'_code' => $code,
476							$k.'_re' => qr/$re/,
477							)
478							}
479							qw( amp:& lt:< );
480
481							# Make the values of this private var available to the tests.
482	1			1		100	sub __escape_sequences { %_escape }
483
484
485							# HTML-entity encode any characters configured by the user.
486							# If that doesn't include [&<] then we escape those chars so we can decide
487							# later if we will entity-encode them or put them back verbatim.
488							sub _encode_or_escape_entities {
489	460			460		539	my $self = $_[0];
490	460					664	my $stash = $self->_private;
491	460					646	local $_ = $_[1];
492
493	460	100				738	if( $stash->{encode_amp} ){
		50
494	440	100				632	if( exists($stash->{html_encode_chars}) ){
495							# Escape all amps for later processing.
496							# Pass intermediate strings to entity encoder so that it doesn't
497							# process any of the characters of our escape sequences.
498							# Use -1 to get "as many fields as possible" so that we keep leading and
499							# trailing (possibly empty) fields.
500	38					109	$_ = join $_escape{amp}, map { $self->encode_entities($_) } split /&/, $_, -1;
	57					621
501							}
502							else {
503	402					680	s/&/$_escape{amp}/g;
504							}
505							}
506							elsif( exists($stash->{html_encode_chars}) ){
507	20					33	$_ = $self->encode_entities($_);
508							}
509
510							s/</$_escape{lt}/g
511	460	100				2181	if $stash->{encode_lt};
512
513	460					782	return $_;
514							}
515
516							# From Markdown.pl version 1.0.1 line 1172 (_DoAutoLinks).
517							my $EMAIL_MARKER = qr{
518							# < # Opening token is in parent regexp.
519							(?:mailto:)?
520							(
521							[-.\w]+
522							\@
523							[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
524							)
525							>
526							}x;
527
528							# Process any escapes we put in the text earlier,
529							# now that the text is complete (end of a block).
530							sub _process_escapes {
531	494			494		587	my $self = $_[0];
532	494					616	my $stash = $self->_private;
533	494					674	local $_ = $_[1];
534
535							# The patterns below are taken from Markdown.pl 1.0.1 _EncodeAmpsAndAngles().
536							# In this case we only want to encode the ones that Markdown won't.
537							# This is overkill but produces nicer looking text (less escaped entities).
538							# If it proves insufficent then we'll just encode them all.
539
540							# $1: If the escape was in a code sequence, simply replace the original.
541							# $2: If the unescaped value would be followed by characters
542							# that could be interpreted as html, entity-encode it.
543							# else: The character is safe to leave bare.
544
545							# Neither currently allows $2 to contain '0' so bool tests are sufficient.
546
547	494	100				1247	if( $stash->{encode_amp} ){
548							# Encode & if succeeded by chars that look like an html entity.
549	461					1309	s,$_escape{amp_re}((?:#?[xX]?(?:[0-9a-fA-F]+\|\w+);)?),
550	90	100				354	$1 ? '&'.$2 : $2 ? '&'.$2 : '&',egos;
		100
551							}
552
553	494	100				914	if( $stash->{encode_lt} ){
554							# Encode < if succeeded by chars that look like an html tag.
555							# Leave email addresses (<foo@bar.com>) for Markdown to process.
556	461					1831	s,$_escape{lt_re}((?=$EMAIL_MARKER)\|(?:[a-z/?\$!])?),
557	72	100				299	$1 ? '<'.$2 : $2 ? '<'.$2 : '<',egos;
		100
558							}
559
560	494					816	return $_;
561							}
562
563
564							## Parsing ##
565
566							sub handle_text {
567	543			543	0	5677	my $self = $_[0];
568	543					837	my $stash = $self->_private;
569	543					834	local $_ = $_[1];
570
571							# Unless we're in a code span, verbatim block, or formatted region.
572	543	100				948	unless( $stash->{no_escape} ){
573
574							# We could, in theory, alter what gets escaped according to context
575							# (for example, escape square brackets (but not parens) inside link text).
576							# The markdown produced might look slightly nicer but either way you're
577							# at the whim of the markdown processor to interpret things correctly.
578							# For now just escape everything.
579
580							# Don't let literal characters be interpreted as markdown.
581	460					832	$_ = $self->_escape_inline_markdown($_);
582
583							# Entity-encode (or escape for later processing) necessary/desired chars.
584	460					790	$_ = $self->_encode_or_escape_entities($_);
585
586							}
587							# If this _is_ a code section, do limited/specific handling.
588							else {
589							# Always escaping these chars ensures that we won't mangle the text
590							# in the unlikely event that a sequence matching our escape occurred in the
591							# input stream (since we're going to escape it and then unescape it).
592	83	100				273	s/&/$_escape{amp_code}/gos if $stash->{encode_amp};
593	83	100				252	s/</$_escape{lt_code}/gos if $stash->{encode_lt};
594							}
595
596	543					1064	$self->_save($_);
597							}
598
599							sub start_Document {
600	290			290	0	87147	my ($self) = @_;
601	290					569	$self->_new_stack;
602							}
603
604							sub end_Document {
605	290			290	0	18270	my ($self) = @_;
606	290					677	$self->_check_search_header;
607	290					350	my $end = pop @{ $self->_private->{stacks} };
	290					386
608
609	290	50				328	@{ $self->_private->{stacks} } == 0
	290					466
610							or die 'Document ended with stacks remaining';
611
612	290					831	my @doc = $self->_chomp_all(join('', @$end)) . $/;
613
614	290	100				689	if( $self->include_meta_tags ){
615	6					37	unshift @doc, $self->_build_markdown_head, ($/ x 2);
616							}
617
618	290	100				1768	if( my $encoding = $self->_get_output_encoding ){
619							# Do the check outside the loop(s) for efficiency.
620	55	100				392	my $ents = $HAS_HTML_ENTITIES ? \&__entity_encode_ord_he : \&__entity_encode_ord_basic;
621							# Iterate indices to avoid copying large strings.
622	55					129	for my $i ( 0 .. $#doc ){
623	55					99	print { $self->{output_fh} } Encode::encode($encoding, $doc[$i], $ents);
	55					143
624							}
625							}
626							else {
627	235					1033	print { $self->{output_fh} } @doc;
	235					784
628							}
629							}
630
631							sub _get_output_encoding {
632	290			290		405	my ($self) = @_;
633
634							# If 'match_encoding' is set we need to return an encoding.
635							# If pod has no =encoding, Pod::Simple will guess if it sees a high-bit char.
636							# If there are no high-bit chars, encoding is undef.
637							# Use detected_encoding() rather than encoding() because if Pod::Simple
638							# can't use whatever encoding was specified, we probably can't either.
639							# Fallback to 'o_e' if no match is found. This gives the user the choice,
640							# since otherwise there would be no reason to specify 'o_e' and 'm_e'.
641							# Fallback to UTF-8 since it is a reasonable default these days.
642
643	290	100	100			531	return $self->detected_encoding \|\| $self->output_encoding \|\| 'UTF-8'
644							if $self->match_encoding;
645
646							# If output encoding wasn't specified, return false.
647	278					1353	return $self->output_encoding;
648							}
649
650							## Blocks ##
651
652							sub start_Verbatim {
653	17			17	0	4056	my ($self) = @_;
654	17					54	$self->_new_stack;
655	17					32	$self->_private->{no_escape} = 1;
656							}
657
658							sub end_Verbatim {
659	17			17	0	130	my ($self) = @_;
660
661	17					36	my $text = $self->_pop_stack_text;
662
663	17					54	$text = $self->_indent_verbatim($text);
664
665	17					41	$self->_private->{no_escape} = 0;
666
667							# Verbatim blocks do not generate a separate "Para" event.
668	17					33	$self->_save_block($text);
669							}
670
671							sub _indent_verbatim {
672	17			17		29	my ($self, $paragraph) = @_;
673
674							# NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
675							# Pod::Simple also has a 'strip_verbatim_indent' attribute
676							# but it doesn't sound like it gains us anything over this method.
677
678							# POD verbatim can start with any number of spaces (or tabs)
679							# markdown should be 4 spaces (or a tab)
680							# so indent any paragraphs so that all lines start with at least 4 spaces
681	17					52	my @lines = split /\n/, $paragraph;
682	17					24	my $indent = ' ' x 4;
683	17					88	foreach my $line ( @lines ){
684	32	100				92	next unless $line =~ m/^( +)/;
685							# find the smallest indentation
686	31	100				186	$indent = $1 if length($1) < length($indent);
687							}
688	17	100				39	if( (my $smallest = length($indent)) < 4 ){
689							# invert to get what needs to be prepended
690	11					24	$indent = ' ' x (4 - $smallest);
691
692							# Prepend indent to each line.
693							# We could check /\S/ to only indent non-blank lines,
694							# but it's backward compatible to respect the whitespace.
695							# Additionally, both pod and markdown say they ignore blank lines
696							# so it shouldn't hurt to leave them in.
697	11	100				20	$paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
	23					69
698							}
699
700	17					37	return $paragraph;
701							}
702
703							sub start_Para {
704	342			342	0	121983	$_[0]->_new_stack;
705							}
706
707							sub end_Para {
708	342			342	0	2888	my ($self) = @_;
709	342					482	my $text = $self->_pop_stack_text;
710
711	342					633	$text = $self->_escape_paragraph_markdown($text);
712
713	342					575	$self->_save_block($text);
714							}
715
716
717							## Headings ##
718
719	40			40	0	5069	sub start_head1 { $_[0]->_start_head(1) }
720	40			40	0	348	sub end_head1 { $_[0]->_end_head(1) }
721	11			11	0	3823	sub start_head2 { $_[0]->_start_head(2) }
722	11			11	0	104	sub end_head2 { $_[0]->_end_head(2) }
723	1			1	0	334	sub start_head3 { $_[0]->_start_head(3) }
724	1			1	0	18	sub end_head3 { $_[0]->_end_head(3) }
725	0			0	0	0	sub start_head4 { $_[0]->_start_head(4) }
726	0			0	0	0	sub end_head4 { $_[0]->_end_head(4) }
727
728							sub _check_search_header {
729	342			342		597	my ($self) = @_;
730							# Save the text since the last heading if we want it for metadata.
731	342	100				484	if( my $last = $self->_private->{search_header} ){
732	18					27	for( $self->_private->{$last} = $self->_last_string ){
733	18					35	s/\A\s+//;
734	18					81	s/\s+\z//;
735							}
736							}
737							}
738							sub _start_head {
739	52			52		82	my ($self) = @_;
740	52					110	$self->_check_search_header;
741	52					96	$self->_new_stack;
742							}
743
744							sub _end_head {
745	52			52		86	my ($self, $num) = @_;
746	52					89	my $h = '#' x $num;
747
748	52					91	my $text = $self->_pop_stack_text;
749							$self->_private->{search_header} =
750	52	100				185	$text =~ /NAME/ ? 'Title'
		100
751							: $text =~ /AUTHOR/ ? 'Author'
752							: undef;
753
754							# TODO: option for $h suffix
755							# TODO: put a name="" if $self->{embed_anchor_tags}; ?
756							# https://rt.cpan.org/Ticket/Display.html?id=57776
757	52					132	$self->_save_block(join(' ', $h, $text));
758							}
759
760							## Lists ##
761
762							# With Pod::Simple->parse_empty_lists(1) there could be an over_empty event,
763							# but what would you do with that?
764
765							sub _start_list {
766	22			22		36	my ($self) = @_;
767	22					46	$self->_new_stack;
768
769							# Nest again b/c start_item will pop this to look for preceding content.
770	22					47	$self->_increase_indent;
771	22					44	$self->_new_stack;
772							}
773
774							sub _end_list {
775	22			22		38	my ($self) = @_;
776	22					38	$self->_handle_between_item_content;
777
778							# Finish the list.
779
780							# All the child elements should be blocks,
781							# but don't end with a double newline.
782	22					36	my $text = $self->_chomp_all($self->_pop_stack_text);
783
784	22					63	$_[0]->_save_line($text . $/);
785							}
786
787							sub _handle_between_item_content {
788	65			65		311	my ($self) = @_;
789
790							# This might be empty (if the list item had no additional content).
791	65	100				125	if( my $text = $self->_pop_stack_text ){
792							# Else it's a sub-document.
793							# If there are blocks we need to separate with blank lines.
794	21	100				50	if( $self->_private->{last_state}->{blocks} ){
795	16					37	$text = $/ . $text;
796							}
797							# If not, we can condense the text.
798							# In this module's history there was a patch contributed to specifically
799							# produce "huddled" lists so we'll try to maintain that functionality.
800							else {
801	5					12	$text = $self->_chomp_all($text) . $/;
802							}
803	21					37	$self->_save($text)
804							}
805
806	65					110	$self->_decrease_indent;
807							}
808
809							sub _start_item {
810	43			43		59	my ($self) = @_;
811	43					86	$self->_handle_between_item_content;
812	43					69	$self->_new_stack;
813							}
814
815							sub _end_item {
816	43			43		77	my ($self, $marker) = @_;
817	43					87	my $text = $self->_pop_stack_text;
818	43	100	66			209	$self->_save_line($self->_indent($marker .
819							# Add a space only if there is text after the marker.
820							(defined($text) && length($text) ? ' ' . $text : '')
821							));
822
823							# Store any possible contents in a new stack (like a sub-document).
824	43					95	$self->_increase_indent;
825	43					69	$self->_new_stack;
826							}
827
828	7			7	0	1379	sub start_over_bullet { $_[0]->_start_list }
829	7			7	0	529	sub end_over_bullet { $_[0]->_end_list }
830
831	18			18	0	4849	sub start_item_bullet { $_[0]->_start_item }
832	18			18	0	145	sub end_item_bullet { $_[0]->_end_item('-') }
833
834	9			9	0	1895	sub start_over_number { $_[0]->_start_list }
835	9			9	0	1129	sub end_over_number { $_[0]->_end_list }
836
837							sub start_item_number {
838	18			18	0	5463	$_[0]->_start_item;
839							# It seems like this should be a stack,
840							# but from testing it appears that the corresponding 'end' event
841							# comes right after the text (it doesn't surround any embedded content).
842							# See t/nested.t which shows start-item, text, end-item, para, start-item....
843	18					38	$_[0]->_private->{item_number} = $_[1]->{number};
844							}
845
846							sub end_item_number {
847	18			18	0	150	my ($self) = @_;
848	18					26	$self->_end_item($self->_private->{item_number} . '.');
849							}
850
851							# Markdown doesn't support definition lists
852							# so do regular (unordered) lists with indented paragraphs.
853	6			6	0	1267	sub start_over_text { $_[0]->_start_list }
854	6			6	0	617	sub end_over_text { $_[0]->_end_list }
855
856	7			7	0	1759	sub start_item_text { $_[0]->_start_item }
857	7			7	0	59	sub end_item_text { $_[0]->_end_item('-')}
858
859
860							# perlpodspec equates an over/back region with no items to a blockquote.
861							sub start_over_block {
862							# NOTE: We don't actually need to indent for a blockquote.
863	3			3	0	1096	$_[0]->_new_stack;
864							}
865
866							sub end_over_block {
867	3			3	0	491	my ($self) = @_;
868
869							# Chomp first to avoid prefixing a blank line with a `>`.
870	3					7	my $text = $self->_chomp_all($self->_pop_stack_text);
871
872							# NOTE: Paragraphs will already be escaped.
873
874							# I don't really like either of these implementations
875							# but the join/map/split seems a little better and benches a little faster.
876							# You would lose the last newline but we've already chomped.
877							#$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
878	3	100				121	$text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;
	22					51
879
880	3					13	$self->_save_block($text);
881							}
882
883							## Custom Formats ##
884
885							sub start_for {
886	10			10	0	2769	my ($self, $attr) = @_;
887	10					22	$self->_new_stack;
888
889	10	100				29	if( $attr->{target} eq 'html' ){
890							# Use another stack so we can indent
891							# (not syntactily necessary but seems appropriate).
892	2					80	$self->_new_stack;
893	2					6	$self->_increase_indent;
894	2					3	$self->_private->{no_escape} = 1;
895							# Mark this so we know to undo it.
896	2					5	$self->_stack_state->{for_html} = 1;
897							}
898							}
899
900							sub end_for {
901	10			10	0	972	my ($self) = @_;
902							# Data gets saved as a block (which will handle indents),
903							# but if there was html we'll alter this, so chomp and save a block again.
904	10					20	my $text = $self->_chomp_all($self->_pop_stack_text);
905
906	10	100				23	if( $self->_private->{last_state}->{for_html} ){
907	2					4	$self->_private->{no_escape} = 0;
908							# Save it to the next stack up so we can pop it again (we made two stacks).
909	2					5	$self->_save($text);
910	2					91	$self->_decrease_indent;
911	2					5	$text = join "\n", '<div>', $self->_chomp_all($self->_pop_stack_text), '</div>';
912							}
913
914	10					16	$self->_save_block($text);
915							}
916
917							# Data events will be emitted for any formatted regions that have been enabled
918							# (by default, `markdown` and `html`).
919
920							sub start_Data {
921	5			5	0	372	my ($self) = @_;
922							# TODO: limit this to what's in attr?
923	5					10	$self->_private->{no_escape}++;
924	5					9	$self->_new_stack;
925							}
926
927							sub end_Data {
928	5			5	0	38	my ($self) = @_;
929	5					9	my $text = $self->_pop_stack_text;
930	5					9	$self->_private->{no_escape}--;
931	5					9	$self->_save_block($text);
932							}
933
934							## Codes ##
935
936	94			94	0	707	sub start_B { $_[0]->_save('**') }
937	47			47	0	395	sub end_B { $_[0]->start_B() }
938
939	36			36	0	286	sub start_I { $_[0]->_save('_') }
940	18			18	0	284	sub end_I { $_[0]->start_I() }
941
942							sub start_C {
943	61			61	0	944	my ($self) = @_;
944	61					117	$self->_new_stack;
945	61					92	$self->_private->{no_escape}++;
946							}
947
948							sub end_C {
949	61			61	0	491	my ($self) = @_;
950	61					87	$self->_private->{no_escape}--;
951	61					109	$self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
952							}
953
954							# Use code spans for F<>.
955	4			4	0	56	sub start_F { shift->start_C(@_); }
956	4			4	0	34	sub end_F { shift ->end_C(@_); }
957
958							sub start_L {
959	92			92	0	1241	my ($self, $flags) = @_;
960	92					159	$self->_new_stack;
961	92					116	push @{ $self->_private->{link} }, $flags;
	92					129
962							}
963
964							sub end_L {
965	92			92	0	984	my ($self) = @_;
966	92	50				101	my $flags = pop @{ $self->_private->{link} }
	92					162
967							or die 'Invalid state: link end with no link start';
968
969	92					140	my ($type, $to, $section) = @{$flags}{qw( type to section )};
	92					180
970
971	92	50				268	my $url = (
		100
		100
972							$type eq 'url' ? $to
973							: $type eq 'man' ? $self->format_man_url($to, $section)
974							: $type eq 'pod' ? $self->format_perldoc_url($to, $section)
975							: undef
976							);
977
978	92					151	my $text = $self->_pop_stack_text;
979
980							# NOTE: I don't think the perlpodspec says what to do with L<\|blah>
981							# but it seems like a blank link text just doesn't make sense
982	92	100				174	if( !length($text) ){
983	4	100				9	$text =
		100
984							$section ?
985							$to ? sprintf('"%s" in %s', $section, $to)
986							: ('"' . $section . '"')
987							: $to;
988							}
989
990							# FIXME: What does Pod::Simple::X?HTML do for this?
991							# if we don't know how to handle the url just print the pod back out
992	92	50				236	if (!$url) {
993	0					0	$self->_save(sprintf 'L<%s>', $flags->{raw});
994	0					0	return;
995							}
996
997							# In the url we need to escape quotes and parentheses lest markdown
998							# break the url (cut it short and/or wrongfully interpret a title).
999
1000							# Backslash escapes do not work for the space and quotes.
1001							# URL-encoding the space is not sufficient
1002							# (the quotes confuse some parsers and produce invalid html).
1003							# I've arbitratily chosen HTML encoding to hide them from markdown
1004							# while mangling the url as litle as possible.
1005	92					600	$url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;
	11					50
1006
1007							# We also need to double any backslashes that may be present
1008							# (lest they be swallowed up) and stop parens from breaking the url.
1009	92					401	$url =~ s/([\\()])/\\$1/g;
1010
1011							# TODO: put section name in title if not the same as $text
1012	92					419	$self->_save('[' . $text . '](' . $url . ')');
1013							}
1014
1015							sub start_X {
1016	1			1	0	15	$_[0]->_new_stack;
1017							}
1018
1019							sub end_X {
1020	1			1	0	10	my ($self) = @_;
1021	1					3	my $text = $self->_pop_stack_text;
1022							# TODO: mangle $text?
1023							# TODO: put <a name="$text"> if configured
1024							}
1025
1026							# A code span can be delimited by multiple backticks (and a space)
1027							# similar to pod codes (C<< code >>), so ensure we use a big enough
1028							# delimiter to not have it broken by embedded backticks.
1029							sub _wrap_code_span {
1030	61			61		112	my ($self, $arg) = @_;
1031	61					82	my $longest = 0;
1032	61					161	while( $arg =~ /([`]+)/g ){
1033	5					10	my $len = length($1);
1034	5	100				16	$longest = $len if $longest < $len;
1035							}
1036	61					117	my $delim = '`' x ($longest + 1);
1037	61	100				124	my $pad = $longest > 0 ? ' ' : '';
1038	61					167	return $delim . $pad . $arg . $pad . $delim;
1039							}
1040
1041							## Link Formatting (TODO: Move this to another module) ##
1042
1043
1044							sub format_man_url {
1045	6			6	1	13	my ($self, $to) = @_;
1046	6					12	my ($page, $part) = ($to =~ /^ ([^(]+) (?: $ (\S+) $ )? /x);
1047	6		50			186	return $self->man_url_prefix . ($part \|\| 1) . '/' . ($page \|\| $to);
			33
1048							}
1049
1050
1051							sub format_perldoc_url {
1052	69			69	1	113	my ($self, $name, $section) = @_;
1053
1054	69					218	my $url_prefix = $self->perldoc_url_prefix;
1055	69	100	100			187	if (
			100
1056							defined($name)
1057							&& $self->is_local_module($name)
1058							&& defined($self->local_module_url_prefix)
1059							) {
1060	4					7	$url_prefix = $self->local_module_url_prefix;
1061							}
1062
1063	69					1269	my $url = '';
1064
1065							# If the link is to another module (external link).
1066	69	100				130	if ($name) {
1067	58					1217	$url = $url_prefix . URI::Escape::uri_escape($name);
1068							}
1069
1070							# See https://rt.cpan.org/Ticket/Display.html?id=57776
1071							# for a discussion on the need to mangle the section.
1072	69	100				2081	if ($section){
1073
1074	42	100				1084	my $method = $url
1075							# If we already have a prefix on the url it's external.
1076							? $self->perldoc_fragment_format
1077							# Else an internal link points to this markdown doc.
1078							: $self->markdown_fragment_format;
1079
1080	42	100				251	$method = 'format_fragment_' . $method
1081							unless ref($method);
1082
1083							{
1084							# Set topic to enable code refs to be simple.
1085	42					53	local $_ = $section;
	42					52
1086	42					89	$section = $self->$method($section);
1087							}
1088
1089	42					116	$url .= '#' . $section;
1090							}
1091
1092	69					380	return $url;
1093							}
1094
1095
1096							# TODO: simple, pandoc, etc?
1097
1098							sub format_fragment_markdown {
1099	3			3	1	6	my ($self, $section) = @_;
1100
1101							# If this is an internal link (to another section in this doc)
1102							# we can't be sure what the heading id's will look like
1103							# (it depends on what is rendering the markdown to html)
1104							# but we can try to follow popular conventions.
1105
1106							# http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
1107							#$section =~ s/(?![-_.])[[:punct:]]//g;
1108							#$section =~ s/\s+/-/g;
1109	3					5	$section =~ s/\W+/-/g;
1110	3					104	$section =~ s/-+$//;
1111	3					4	$section =~ s/^-+//;
1112	3					7	$section = lc $section;
1113							#$section =~ s/^[^a-z]+//;
1114	3		50			7	$section \|\|= 'section';
1115
1116	3					5	return $section;
1117							}
1118
1119
1120							{
1121							# From Pod::Simple::XHTML 3.28.
1122							# The strings gets passed through encode_entities() before idify().
1123							# If we don't do it here the substitutions below won't operate consistently.
1124
1125							sub format_fragment_pod_simple_xhtml {
1126	9			9	1	10	my ($self, $t) = @_;
1127
1128							# encode_entities {
1129							# We need to use the defaults in case html_encode_chars has been customized
1130							# (since the purpose is to match what external sources are doing).
1131
1132	9					15	local $self->_private->{html_encode_chars};
1133	9					19	$t = $self->encode_entities($t);
1134							# }
1135
1136							# idify {
1137	9					282	for ($t) {
1138	9					18	s/<[^>]+>//g; # Strip HTML.
1139	9					79	s/&[^;]+;//g; # Strip entities.
1140	9					74	s/^\s+//; s/\s+$//; # Strip white space.
	9					77
1141	9					129	s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
1142	9					92	s/^[^a-zA-Z]+//; # First char must be a letter.
1143	9					73	s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
1144	9					119	s/[-:.]+$//; # Strip trailing punctuation.
1145							}
1146							# }
1147
1148	9					41	return $t;
1149							}
1150							}
1151
1152
1153							sub format_fragment_pod_simple_html {
1154	9			9	1	14	my ($self, $section) = @_;
1155
1156							# From Pod::Simple::HTML 3.28.
1157
1158							# section_name_tidy {
1159	9					15	$section =~ s/^\s+//;
1160	9					128	$section =~ s/\s+$//;
1161	9					160	$section =~ tr/ /_/;
1162	9					224	$section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
1163
1164							#$section = $self->unicode_escape_url($section);
1165							# unicode_escape_url {
1166	9					17	$section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
1167							# Turn char 1234 into "(1234)"
1168							# }
1169
1170	9	50				15	$section = '_' unless length $section;
1171	9					19	return $section;
1172							# }
1173							}
1174
1175
1176	9			9	1	19	sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
1177	9			9	1	19	sub format_fragment_sco { shift->format_fragment_pod_simple_html(@_); }
1178
1179
1180							sub is_local_module {
1181	58			58	1	79	my ($self, $name) = @_;
1182
1183	58					159	return ($name =~ $self->local_module_re);
1184							}
1185
1186							1;
1187
1188							__END__
1189
1190							=pod
1191
1192							=encoding UTF-8
1193
1194							=for :stopwords Marcel Gruenauer Victor Moral Ryan C. Thompson <rct at thompsonclan d0t
1195							org> Aristotle Pagaltzis Randy Stauner ACKNOWLEDGEMENTS html cpan
1196							testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto
1197							metadata placeholders metacpan
1198
1199							=head1 NAME
1200
1201							Pod::Markdown - Convert POD to Markdown
1202
1203							=head1 VERSION
1204
1205							version 3.200
1206
1207							=for test_synopsis my ($pod_string);
1208
1209							=head1 SYNOPSIS
1210
1211							# Pod::Simple API is supported.
1212
1213							# Command line usage: Parse a pod file and print to STDOUT:
1214							# $ perl -MPod::Markdown -e 'Pod::Markdown->new->filter(@ARGV)' path/to/POD/file > README.md
1215
1216							# Work with strings:
1217							my $markdown;
1218							my $parser = Pod::Markdown->new;
1219							$parser->output_string(\$markdown);
1220							$parser->parse_string_document($pod_string);
1221
1222							# See Pod::Simple docs for more.
1223
1224							=head1 DESCRIPTION
1225
1226							This module uses L<Pod::Simple> to convert POD to Markdown.
1227
1228							Literal characters in Pod that are special in Markdown
1229							(like asterisks) are backslash-escaped when appropriate.
1230
1231							By default C<markdown> and C<html> formatted regions are accepted.
1232							Regions of C<markdown> will be passed through unchanged.
1233							Regions of C<html> will be placed inside a C<< E<lt>divE<gt> >> tag
1234							so that markdown characters won't be processed.
1235							Regions of C<:markdown> or C<:html> will be processed as POD and included.
1236							To change which regions are accepted use the L<Pod::Simple> API:
1237
1238							my $parser = Pod::Markdown->new;
1239							$parser->unaccept_targets(qw( markdown html ));
1240
1241							=head2 A note on encoding and escaping
1242
1243							The common L<Pod::Simple> API returns a character string.
1244							If you want Pod::Markdown to return encoded octets, there are two attributes
1245							to assist: L</match_encoding> and L</output_encoding>.
1246
1247							When an output encoding is requested any characters that are not valid
1248							for that encoding will be escaped as HTML entities.
1249
1250							This is not 100% safe, however.
1251
1252							Markdown escapes all ampersands inside of code spans, so escaping a character
1253							as an HTML entity inside of a code span will not be correct.
1254							However, with pod's C<S> and C<E> sequences it is possible
1255							to end up with high-bit characters inside of code spans.
1256
1257							So, while C<< output_encoding => 'ascii' >> can work, it is not recommended.
1258							For these reasons (and more), C<UTF-8> is the default, fallback encoding (when one is required).
1259
1260							If you prefer HTML entities over literal characters you can use
1261							L</html_encode_chars> which will only operate outside of code spans (where it is safe).
1262
1263							=head1 METHODS
1264
1265							=head2 new
1266
1267							Pod::Markdown->new(%options);
1268
1269							The constructor accepts the following named arguments:
1270
1271							=over 4
1272
1273							=item *
1274
1275							C<local_module_url_prefix>
1276
1277							Alters the perldoc urls that are created from C<< LE<lt>E<gt> >> codes
1278							when the module is a "local" module (C<"Local::"> or C<"Foo_Corp::"> (see L<perlmodlib>)).
1279
1280							The default is to use C<perldoc_url_prefix>.
1281
1282							=item *
1283
1284							C<local_module_re>
1285
1286							Alternate regular expression for determining "local" modules.
1287							Default is C<< our $LOCAL_MODULE_RE = qr/^(Local::\|\w?_\w)/ >>.
1288
1289							=item *
1290
1291							C<man_url_prefix>
1292
1293							Alters the man page urls that are created from C<< LE<lt>E<gt> >> codes.
1294
1295							The default is C<http://man.he.net/man>.
1296
1297							=item *
1298
1299							C<perldoc_url_prefix>
1300
1301							Alters the perldoc urls that are created from C<< LE<lt>E<gt> >> codes.
1302							Can be:
1303
1304							=over 4
1305
1306							=item *
1307
1308							C<metacpan> (shortcut for C<https://metacpan.org/pod/>)
1309
1310							=item *
1311
1312							C<sco> (shortcut for C<http://search.cpan.org/perldoc?>)
1313
1314							=item *
1315
1316							any url
1317
1318							=back
1319
1320							The default is C<metacpan>.
1321
1322							Pod::Markdown->new(perldoc_url_prefix => 'http://localhost/perl/pod');
1323
1324							=item *
1325
1326							C<perldoc_fragment_format>
1327
1328							Alters the format of the url fragment for any C<< LE<lt>E<gt> >> links
1329							that point to a section of an external document (C<< L<name/section> >>).
1330							The default will be chosen according to the destination L</perldoc_url_prefix>.
1331							Alternatively you can specify one of the following:
1332
1333							=over 4
1334
1335							=item *
1336
1337							C<metacpan>
1338
1339							=item *
1340
1341							C<sco>
1342
1343							=item *
1344
1345							C<pod_simple_xhtml>
1346
1347							=item *
1348
1349							C<pod_simple_html>
1350
1351							=item *
1352
1353							A code ref
1354
1355							=back
1356
1357							The code ref can expect to receive two arguments:
1358							the parser object (C<$self>) and the section text.
1359							For convenience the topic variable (C<$_>) is also set to the section text:
1360
1361							perldoc_fragment_format => sub { s/\W+/-/g; }
1362
1363							=item *
1364
1365							C<markdown_fragment_format>
1366
1367							Alters the format of the url fragment for any C<< LE<lt>E<gt> >> links
1368							that point to an internal section of this document (C<< L</section> >>).
1369
1370							Unfortunately the format of the id attributes produced
1371							by whatever system translates the markdown into html is unknown at the time
1372							the markdown is generated so we do some simple clean up.
1373
1374							B<Note:> C<markdown_fragment_format> and C<perldoc_fragment_format> accept
1375							the same values: a (shortcut to a) method name or a code ref.
1376
1377							=item *
1378
1379							C<include_meta_tags>
1380
1381							Specifies whether or not to print author/title meta tags at the top of the document.
1382							Default is false.
1383
1384							=back
1385
1386							=head2 html_encode_chars
1387
1388							A string of characters to encode as html entities
1389							(using L<HTML::Entities/encode_entities> if available, falling back to numeric entities if not).
1390
1391							Possible values:
1392
1393							=over 4
1394
1395							=item *
1396
1397							A value of C<1> will use the default set of characters from L<HTML::Entities> (control chars, high-bit chars, and C<< <&>"' >>).
1398
1399							=item *
1400
1401							A false value will disable.
1402
1403							=item *
1404
1405							Any other value is used as a string of characters (like a regular expression character class).
1406
1407							=back
1408
1409							By default this is disabled and literal characters will be in the output stream.
1410							If you specify a desired L</output_encoding> any characters not valid for that encoding will be HTML entity encoded.
1411
1412							B<Note> that Markdown requires ampersands (C<< & >>) and left angle brackets (C<< < >>)
1413							to be entity-encoded if they could otherwise be interpreted as html entities.
1414							If this attribute is configured to encode those characters, they will always be encoded.
1415							If not, the module will make an effort to only encode the ones required,
1416							so there will be less html noise in the output.
1417
1418							=head2 match_encoding
1419
1420							Boolean: If true, use the C<< =encoding >> of the input pod
1421							as the encoding for the output.
1422
1423							If no encoding is specified, L<Pod::Simple> will guess the encoding
1424							if it sees a high-bit character.
1425
1426							If no encoding is guessed (or the specified encoding is unusable),
1427							L</output_encoding> will be used if it was specified.
1428							Otherwise C<UTF-8> will be used.
1429
1430							This attribute is not recommended
1431							but is provided for consistency with other pod converters.
1432
1433							Defaults to false.
1434
1435							=head2 output_encoding
1436
1437							The encoding to use when writing to the output file handle.
1438
1439							If neither this nor L</match_encoding> are specified,
1440							a character string will be returned in whatever L<Pod::Simple> output method you specified.
1441
1442							=head2 local_module_re
1443
1444							Returns the regular expression used to determine local modules.
1445
1446							=head2 local_module_url_prefix
1447
1448							Returns the url prefix in use for local modules.
1449
1450							=head2 man_url_prefix
1451
1452							Returns the url prefix in use for man pages.
1453
1454							=head2 perldoc_url_prefix
1455
1456							Returns the url prefix in use (after resolving shortcuts to urls).
1457
1458							=head2 perldoc_fragment_format
1459
1460							Returns the coderef or format name used to format a url fragment
1461							to a section in an external document.
1462
1463							=head2 markdown_fragment_format
1464
1465							Returns the coderef or format name used to format a url fragment
1466							to an internal section in this document.
1467
1468							=head2 include_meta_tags
1469
1470							Returns the boolean value indicating
1471							whether or not meta tags will be printed.
1472
1473							=head2 format_man_url
1474
1475							Used internally to create a url (using L</man_url_prefix>)
1476							from a string like C<man(1)>.
1477
1478							=head2 format_perldoc_url
1479
1480							# With $name and section being the two parts of L<name/section>.
1481							my $url = $parser->format_perldoc_url($name, $section);
1482
1483							Used internally to create a url from
1484							the name (of a module or script)
1485							and a possible section (heading).
1486
1487							The format of the url fragment (when pointing to a section in a document)
1488							varies depending on the destination url
1489							so L</perldoc_fragment_format> is used (which can be customized).
1490
1491							If the module name portion of the link is blank
1492							then the section is treated as an internal fragment link
1493							(to a section of the generated markdown document)
1494							and L</markdown_fragment_format> is used (which can be customized).
1495
1496							=head2 format_fragment_markdown
1497
1498							Format url fragment for an internal link
1499							by replacing non-word characters with dashes.
1500
1501							=head2 format_fragment_pod_simple_xhtml
1502
1503							Format url fragment like L<Pod::Simple::XHTML/idify>.
1504
1505							=head2 format_fragment_pod_simple_html
1506
1507							Format url fragment like L<Pod::Simple::HTML/section_name_tidy>.
1508
1509							=head2 format_fragment_metacpan
1510
1511							Format fragment for L<metacpan.org>
1512							(uses L</format_fragment_pod_simple_xhtml>).
1513
1514							=head2 format_fragment_sco
1515
1516							Format fragment for L<search.cpan.org>
1517							(uses L</format_fragment_pod_simple_html>).
1518
1519							=head2 is_local_module
1520
1521							Uses C<local_module_re> to determine if passed module is a "local" module.
1522
1523							=for Pod::Coverage parse_from_file
1524							parse_from_filehandle
1525
1526							=for Pod::Coverage as_markdown
1527
1528							=for Pod::Coverage handle_text
1529							end_.+
1530							start_.+
1531							encode_entities
1532
1533							=head1 SEE ALSO
1534
1535							=over 4
1536
1537							=item *
1538
1539							L<pod2markdown> - script included for command line usage
1540
1541							=item *
1542
1543							L<Pod::Simple> - Super class that handles Pod parsing
1544
1545							=item *
1546
1547							L<perlpod> - For writing POD
1548
1549							=item *
1550
1551							L<perlpodspec> - For parsing POD
1552
1553							=item *
1554
1555							L<http://daringfireball.net/projects/markdown/syntax> - Markdown spec
1556
1557							=back
1558
1559							=head1 SUPPORT
1560
1561							=head2 Perldoc
1562
1563							You can find documentation for this module with the perldoc command.
1564
1565							perldoc Pod::Markdown
1566
1567							=head2 Websites
1568
1569							The following websites have more information about this module, and may be of help to you. As always,
1570							in addition to those websites please use your favorite search engine to discover more resources.
1571
1572							=over 4
1573
1574							=item *
1575
1576							MetaCPAN
1577
1578							A modern, open-source CPAN search engine, useful to view POD in HTML format.
1579
1580							L<https://metacpan.org/release/Pod-Markdown>
1581
1582							=back
1583
1584							=head2 Bugs / Feature Requests
1585
1586							Please report any bugs or feature requests by email to C<bug-pod-markdown at rt.cpan.org>, or through
1587							the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=Pod-Markdown>. You will be automatically notified of any
1588							progress on the request by the system.
1589
1590							=head2 Source Code
1591
1592
1593							L<https://github.com/rwstauner/Pod-Markdown>
1594
1595							git clone https://github.com/rwstauner/Pod-Markdown.git
1596
1597							=head1 AUTHORS
1598
1599							=over 4
1600
1601							=item *
1602
1603							Marcel Gruenauer <marcel@cpan.org>
1604
1605							=item *
1606
1607							Victor Moral <victor@taquiones.net>
1608
1609							=item *
1610
1611							Ryan C. Thompson <rct at thompsonclan d0t org>
1612
1613							=item *
1614
1615							Aristotle Pagaltzis <pagaltzis@gmx.de>
1616
1617							=item *
1618
1619							Randy Stauner <rwstauner@cpan.org>
1620
1621							=back
1622
1623							=head1 CONTRIBUTORS
1624
1625							=for stopwords Aristotle Pagaltzis Cindy Wang (CindyLinz) Graham Ollis Mike Covington motemen moznion Peter Vereshagin Ryan C. Thompson Yasutaka ATARASHI
1626
1627							=over 4
1628
1629							=item *
1630
1631							Aristotle Pagaltzis <aristotle@cpan.org>
1632
1633							=item *
1634
1635							Cindy Wang (CindyLinz) <cindylinz@gmail.com>
1636
1637							=item *
1638
1639							Graham Ollis <plicease@cpan.org>
1640
1641							=item *
1642
1643							Mike Covington <mfcovington@gmail.com>
1644
1645							=item *
1646
1647							motemen <motemen@cpan.org>
1648
1649							=item *
1650
1651							moznion <moznion@cpan.org>
1652
1653							=item *
1654
1655							Peter Vereshagin <veresc@cpan.org>
1656
1657							=item *
1658
1659							Ryan C. Thompson <rthompson@cpan.org>
1660
1661							=item *
1662
1663							Yasutaka ATARASHI <yakex@cpan.org>
1664
1665							=back
1666
1667							=head1 COPYRIGHT AND LICENSE
1668
1669							This software is copyright (c) 2011 by Randy Stauner.
1670
1671							This is free software; you can redistribute it and/or modify it under
1672							the same terms as the Perl 5 programming language system itself.
1673
1674							=cut