File Coverage

blib/lib/Pod/Markdown.pm

Criterion	Covered	Total	%
statement	387	395	97.9
branch	118	128	92.1
condition	22	34	64.7
subroutine	98	100	98.0
pod	11	59	18.6
total	636	716	88.8

line	stmt	bran	cond	sub	pod	time	code
1							# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
2							#
3							# This file is part of Pod-Markdown
4							#
5							# This software is copyright (c) 2011 by Randy Stauner.
6							#
7							# This is free software; you can redistribute it and/or modify it under
8							# the same terms as the Perl 5 programming language system itself.
9							#
10	16			16		1213850	use 5.008;
	16					227
11	16			16		100	use strict;
	16					28
	16					359
12	16			16		102	use warnings;
	16					26
	16					1290
13
14							package Pod::Markdown;
15							# git description: v3.100-2-g1399a61
16
17							our $AUTHORITY = 'cpan:RWSTAUNER';
18							# ABSTRACT: Convert POD to Markdown
19							$Pod::Markdown::VERSION = '3.101';
20	16			16		8817	use Pod::Simple 3.27 (); # detected_encoding and keep_encoding bug fix
	16					426825
	16					533
21	16			16		7067	use parent qw(Pod::Simple::Methody);
	16					4028
	16					93
22	16			16		19659	use Encode ();
	16					137742
	16					1948
23
24							our %URL_PREFIXES = (
25							sco => 'http://search.cpan.org/perldoc?',
26							metacpan => 'https://metacpan.org/pod/',
27							man => 'http://man.he.net/man',
28							);
29							$URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};
30
31							our $LOCAL_MODULE_RE = qr/^(Local::\|\w?_\w)/;
32
33							## no critic
34							#{
35							our $HAS_HTML_ENTITIES;
36
37							# Stolen from Pod::Simple::XHTML 3.28. {{{
38
39							BEGIN {
40	16			16		894	$HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
41							}
42
43							my %entities = (
44							q{>} => 'gt',
45							q{<} => 'lt',
46							q{'} => '#39',
47							q{"} => 'quot',
48							q{&} => 'amp',
49							);
50
51							sub encode_entities {
52	234			234	0	384	my $self = shift;
53	234					359	my $ents = $self->html_encode_chars;
54	234	100				549	return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
55	49	100				75	if (defined $ents) {
56	48					84	$ents =~ s,(?
57	48					72	$ents =~ s,(?
58							} else {
59	1					5	$ents = join '', keys %entities;
60							}
61	49					66	my $str = $_[0];
62	49		66			227	$str =~ s/([$ents])/'&' . ($entities{$1} \|\| sprintf '#x%X', ord $1) . ';'/ge;
	23					146
63	49					152	return $str;
64							}
65
66							# }}}
67
68							# Add a few very common ones for consistency and readability
69							# (in case HTML::Entities isn't available).
70							%entities = (
71							# Pod::Markdown has always required 5.8 so unicode_to_native will be available.
72							chr(utf8::unicode_to_native(0xA0)) => 'nbsp',
73							chr(utf8::unicode_to_native(0xA9)) => 'copy',
74							%entities
75							);
76
77							sub __entity_encode_ord_he {
78	11			11		251	my $chr = chr $_[0];
79							# Skip the encode_entities() logic and go straight for the substitution
80							# since we already have the char we know we want replaced.
81							# Both the hash and the function are documented as exportable (so should be reliable).
82	11		33			54	return $HTML::Entities::char2entity{ $chr } \|\| HTML::Entities::num_entity( $chr );
83							}
84							sub __entity_encode_ord_basic {
85	7		33	7		241	return '&' . ($entities{chr $_[0]} \|\| sprintf '#x%X', $_[0]) . ';';
86							}
87
88							# From HTML::Entities 3.69
89							my $DEFAULT_ENTITY_CHARS = '^\n\r\t !\#\$%\(-;=?-~';
90
91							#}
92							## use critic
93
94							# Use hash for simple "exists" check in `new` (much more accurate than `->can`).
95							my %attributes = map { ($_ => 1) }
96							qw(
97							html_encode_chars
98							match_encoding
99							output_encoding
100							local_module_re
101							local_module_url_prefix
102							man_url_prefix
103							perldoc_url_prefix
104							perldoc_fragment_format
105							markdown_fragment_format
106							include_meta_tags
107							);
108
109
110							sub new {
111	302			302	1	173799	my $class = shift;
112	302					807	my %args = @_;
113
114	302					946	my $self = $class->SUPER::new();
115	302					5005	$self->preserve_whitespace(1);
116	302					2075	$self->nbsp_for_S(1);
117	302					1613	$self->accept_targets(qw( markdown html ));
118
119							# Default to the global, but allow it to be overwritten in args.
120	302					4904	$self->local_module_re($LOCAL_MODULE_RE);
121
122	302					1426	for my $type ( qw( perldoc man ) ){
123	604					975	my $attr = $type . '_url_prefix';
124							# Initialize to the alias.
125	604					1278	$self->$attr($type);
126							}
127
128	302					877	while( my ($attr, $val) = each %args ){
129							# NOTE: Checking exists on a private var means we don't allow Pod::Simple
130							# attributes to be set this way. It's not very consistent, but I think
131							# I'm ok with that for now since there probably aren't many Pod::Simple attributes
132							# being changed besides `output_*` which feel like API rather than attributes.
133							# We'll see.
134							# This is currently backward-compatible as we previously just put the attribute
135							# into the private stash so anything unknown was silently ignored.
136							# We could open this up to `$self->can($attr)` in the future if that seems better
137							# but it tricked me when I was testing a misspelled attribute name
138							# which also happened to be a Pod::Simple method.
139
140	905	100				3231	exists $attributes{ $attr } or
141							# Provide a more descriptive message than "Can't locate object method".
142							warn("Unknown argument to ${class}->new(): '$attr'"), next;
143
144							# Call setter.
145	903					1662	$self->$attr($val);
146							}
147
148							# TODO: call from the setters.
149	302					1501	$self->_prepare_fragment_formats;
150
151	302					779	return $self;
152							}
153
154							for my $type ( qw( local_module perldoc man ) ){
155							my $attr = $type . '_url_prefix';
156	16			16		128	no strict 'refs'; ## no critic
	16					33
	16					85790
157							*$attr = sub {
158	987			987		12581	my $self = shift;
159	987	100				1503	if (@_) {
160	844		66			3074	$self->{$attr} = $URL_PREFIXES{ $_[0] } \|\| $_[0];
161							}
162							else {
163	143					411	return $self->{$attr};
164							}
165							}
166							}
167
168							## Attribute accessors ##
169
170
171							sub html_encode_chars {
172	305			305	1	326	my $self = shift;
173	305					412	my $stash = $self->_private;
174
175							# Setter.
176	305	100				517	if( @_ ){
177							# If false ('', 0, undef), disable.
178	71	50				124	if( !$_[0] ){
179	0					0	delete $stash->{html_encode_chars};
180	0					0	$stash->{encode_amp} = 1;
181	0					0	$stash->{encode_lt} = 1;
182							}
183							else {
184							# Special case boolean '1' to mean "all".
185							# If we have HTML::Entities, undef will use the default.
186							# Without it, we need to specify so that we use the same list (for consistency).
187	71	100				172	$stash->{html_encode_chars} = $_[0] eq '1' ? ($HAS_HTML_ENTITIES ? undef : $DEFAULT_ENTITY_CHARS) : $_[0];
		100
188
189							# If [char] doesn't get encoded, we need to do it ourselves.
190	71					139	$stash->{encode_amp} = ($self->encode_entities('&') eq '&');
191	71					2363	$stash->{encode_lt} = ($self->encode_entities('<') eq '<');
192							}
193	71					1539	return;
194							}
195
196							# Getter.
197	234					354	return $stash->{html_encode_chars};
198							}
199
200
201							# I prefer ro-accessors (immutability!) but it can be confusing
202							# to not support the same API as other Pod::Simple classes.
203
204							# NOTE: Pod::Simple::_accessorize is not a documented public API.
205							# Skip any that have already been defined.
206							__PACKAGE__->_accessorize(grep { !__PACKAGE__->can($_) } keys %attributes);
207
208							sub _prepare_fragment_formats {
209	302			302		414	my ($self) = @_;
210
211	302					856	foreach my $attr ( keys %attributes ){
212	3020	100				6272	next unless $attr =~ /^(\w+)_fragment_format/;
213	604					1105	my $type = $1;
214	604					1200	my $format = $self->$attr;
215
216							# If one was provided.
217	604	100				2949	if( $format ){
218							# If the attribute is a coderef just use it.
219	510	100				1108	next if ref($format) eq 'CODE';
220							}
221							# Else determine a default.
222							else {
223	94	100				170	if( $type eq 'perldoc' ){
224							# Choose a default that matches the destination url.
225	46					75	my $target = $self->perldoc_url_prefix;
226	46					117	foreach my $alias ( qw( metacpan sco ) ){
227	92	100				197	if( $target eq $URL_PREFIXES{ $alias } ){
228	37					53	$format = $alias;
229							}
230							}
231							# This seems like a reasonable fallback.
232	46		100			94	$format \|\|= 'pod_simple_xhtml';
233							}
234							else {
235	48					66	$format = $type;
236							}
237							}
238
239							# The short name should become a method name with the prefix prepended.
240	140					200	my $prefix = 'format_fragment_';
241	140					437	$format =~ s/^$prefix//;
242	140	50				509	die "Unknown fragment format '$format'"
243							unless $self->can($prefix . $format);
244
245							# Save it.
246	140					329	$self->$attr($format);
247							}
248
249	302					640	return;
250							}
251
252							## Backward compatible API ##
253
254							# For backward compatibility (previously based on Pod::Parser):
255							# While Pod::Simple provides a parse_from_file() method
256							# it's primarily for Pod::Parser compatibility.
257							# When called without an output handle it will print to STDOUT
258							# but the old Pod::Markdown never printed to a handle
259							# so we don't want to start now.
260							sub parse_from_file {
261	10			10	1	17	my ($self, $file) = @_;
262
263							# TODO: Check that all dependent cpan modules use the Pod::Simple API
264							# then add a deprecation warning here to avoid confusion.
265
266	10					38	$self->output_string(\($self->{_as_markdown_}));
267	10					2019	$self->parse_file($file);
268							}
269
270							# Likewise, though Pod::Simple doesn't define this method at all.
271	9			9	0	344	sub parse_from_filehandle { shift->parse_from_file(@_) }
272
273
274							## Document state ##
275
276							sub _private {
277	9761			9761		11450	my ($self) = @_;
278							$self->{_Pod_Markdown_} \|\|= {
279	9761		100			25648	indent => 0,
280							stacks => [],
281							states => [{}],
282							link => [],
283							encode_amp => 1,
284							encode_lt => 1,
285							};
286							}
287
288							sub _increase_indent {
289	67	50		67		108	++$_[0]->_private->{indent} >= 1
290							or die 'Invalid state: indent < 0';
291							}
292							sub _decrease_indent {
293	67	50		67		88	--$_[0]->_private->{indent} >= 0
294							or die 'Invalid state: indent < 0';
295							}
296
297							sub _new_stack {
298	1002			1002		1141	push @{ $_[0]->_private->{stacks} }, [];
	1002					1524
299	1002					1224	push @{ $_[0]->_private->{states} }, {};
	1002					1374
300							}
301
302							sub _last_string {
303	18			18		25	$_[0]->_private->{stacks}->[-1][-1];
304							}
305
306							sub _pop_stack_text {
307	713			713		789	$_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
	713					982
308	713					878	join '', @{ pop @{ $_[0]->_private->{stacks} } };
	713					734
	713					920
309							}
310
311							sub _stack_state {
312	430			430		604	$_[0]->_private->{states}->[-1];
313							}
314
315							sub _save {
316	1340			1340		2176	my ($self, $text) = @_;
317	1340					1423	push @{ $self->_private->{stacks}->[-1] }, $text;
	1340					1737
318							# return $text; # DEBUG
319							}
320
321							sub _save_line {
322	493			493		727	my ($self, $text) = @_;
323
324	493					769	$text = $self->_process_escapes($text);
325
326	493					1211	$self->_save($text . $/);
327							}
328
329							# For paragraphs, etc.
330							sub _save_block {
331	428			428		630	my ($self, $text) = @_;
332
333	428					651	$self->_stack_state->{blocks}++;
334
335	428					784	$self->_save_line($self->_indent($text) . $/);
336							}
337
338							## Formatting ##
339
340							sub _chomp_all {
341	331			331		518	my ($self, $text) = @_;
342	331					1027	1 while chomp $text;
343	331					791	return $text;
344							}
345
346							sub _indent {
347	471			471		633	my ($self, $text) = @_;
348	471					627	my $level = $self->_private->{indent};
349
350	471	100				775	if( $level ){
351	34					70	my $indent = ' ' x ($level * 4);
352
353							# Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
354	34					183	$text =~ s/^(.+)/$indent$1/mg;
355							}
356
357	471					1343	return $text;
358							}
359
360							# as_markdown() exists solely for backward compatibility
361							# and requires having called parse_from_file() to be useful.
362
363
364							sub as_markdown {
365	10			10	0	276	my ($parser, %args) = @_;
366	10					11	my @header;
367							# Don't add meta tags again if we've already done it.
368	10	100	100			30	if( $args{with_meta} && !$parser->include_meta_tags ){
369	3					21	@header = $parser->_build_markdown_head;
370							}
371	10					47	return join("\n" x 2, @header, $parser->{_as_markdown_});
372							}
373
374							sub _build_markdown_head {
375	9			9		12	my $parser = shift;
376	9					10	my $data = $parser->_private;
377							return join "\n",
378	12					55	map { qq![[meta \l$_="$data->{$_}"]]! }
379	9					17	grep { defined $data->{$_} }
	18					38
380							qw( Title Author );
381							}
382
383							## Escaping ##
384
385							# http://daringfireball.net/projects/markdown/syntax#backslash
386							# Markdown provides backslash escapes for the following characters:
387							#
388							# \ backslash
389							# ` backtick
390							# * asterisk
391							# _ underscore
392							# {} curly braces
393							# [] square brackets
394							# () parentheses
395							# # hash mark
396							# + plus sign
397							# - minus sign (hyphen)
398							# . dot
399							# ! exclamation mark
400
401							# However some of those only need to be escaped in certain places:
402							# * Backslashes do need to be escaped or they may be swallowed by markdown.
403							# * Word-surrounding characters (/[`_]/) do* need to be escaped mid-word
404							# because the markdown spec explicitly allows mid-word emphasis.
405							# * I don't actually see anything that curly braces are used for.
406							# * Escaping square brackets is enough to avoid accidentally
407							# creating links and images (so we don't need to escape plain parentheses
408							# or exclamation points as that would generate a lot of unnecesary noise).
409							# Parentheses will be escaped in urls (&end_L) to avoid premature termination.
410							# * We don't need a backslash for every hash mark or every hyphen found mid-word,
411							# just the ones that start a line (likewise for plus and dot).
412							# (Those will all be handled by _escape_paragraph_markdown).
413
414
415							# Backslash escape markdown characters to avoid having them interpreted.
416							sub _escape_inline_markdown {
417	459			459		598	local $_ = $_[1];
418
419							# s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
420	459					921	s/([\\`*_\[\]])/\\$1/g;
421
422	459					805	return $_;
423							}
424
425							# Escape markdown characters that would be interpreted
426							# at the start of a line.
427							sub _escape_paragraph_markdown {
428	341			341		471	local $_ = $_[1];
429
430							# Escape headings, horizontal rules, (unordered) lists, and blockquotes.
431	341					723	s/^([-+#>])/\\$1/mg;
432
433							# Markdown doesn't support backslash escapes for equal signs
434							# even though they can be used to underline a header.
435							# So use html to escape them to avoid having them interpreted.
436	341					493	s/^([=])/sprintf '&#x%x;', ord($1)/mge;
	1					7
437
438							# Escape the dots that would wrongfully create numbered lists.
439	341					450	s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;
440
441	341					517	return $_;
442							}
443
444
445							# Additionally Markdown allows inline html so we need to escape things that look like it.
446							# While _some_ Markdown processors handle backslash-escaped html,
447							# [Daring Fireball](http://daringfireball.net/projects/markdown/syntax) states distinctly:
448							# > In HTML, there are two characters that demand special treatment: < and &...
449							# > If you want to use them as literal characters, you must escape them as entities, e.g. <, and &.
450
451							# It goes on to say:
452							# > Markdown allows you to use these characters naturally,
453							# > taking care of all the necessary escaping for you.
454							# > If you use an ampersand as part of an HTML entity,
455							# > it remains unchanged; otherwise it will be translated into &.
456							# > Similarly, because Markdown supports inline HTML,
457							# > if you use angle brackets as delimiters for HTML tags, Markdown will treat them as such.
458
459							# In order to only encode the occurrences that require it (something that
460							# could be interpreted as an entity) we escape them all so that we can do the
461							# suffix test later after the string is complete (since we don't know what
462							# strings might come after this one).
463
464							my %_escape =
465							map {
466							my ($k, $v) = split /:/;
467							# Put the "code" marker before the char instead of after so that it doesn't
468							# get confused as the $2 (which is what requires us to entity-encode it).
469							# ( "XsX", "XcsX", "X(c?)sX" )
470							my ($s, $code, $re) = map { "\0$_$v\0" } '', map { ($_, '('.$_.'?)') } 'c';
471
472							(
473							$k => $s,
474							$k.'_code' => $code,
475							$k.'_re' => qr/$re/,
476							)
477							}
478							qw( amp:& lt:< );
479
480							# Make the values of this private var available to the tests.
481	1			1		77	sub __escape_sequences { %_escape }
482
483
484							# HTML-entity encode any characters configured by the user.
485							# If that doesn't include [&<] then we escape those chars so we can decide
486							# later if we will entity-encode them or put them back verbatim.
487							sub _encode_or_escape_entities {
488	459			459		516	my $self = $_[0];
489	459					604	my $stash = $self->_private;
490	459					656	local $_ = $_[1];
491
492	459	100				738	if( $stash->{encode_amp} ){
		50
493	439	100				651	if( exists($stash->{html_encode_chars}) ){
494							# Escape all amps for later processing.
495							# Pass intermediate strings to entity encoder so that it doesn't
496							# process any of the characters of our escape sequences.
497							# Use -1 to get "as many fields as possible" so that we keep leading and
498							# trailing (possibly empty) fields.
499	38					126	$_ = join $_escape{amp}, map { $self->encode_entities($_) } split /&/, $_, -1;
	57					677
500							}
501							else {
502	401					699	s/&/$_escape{amp}/g;
503							}
504							}
505							elsif( exists($stash->{html_encode_chars}) ){
506	20					35	$_ = $self->encode_entities($_);
507							}
508
509							s/
510	459	100				2315	if $stash->{encode_lt};
511
512	459					770	return $_;
513							}
514
515							# From Markdown.pl version 1.0.1 line 1172 (_DoAutoLinks).
516							my $EMAIL_MARKER = qr{
517							# < # Opening token is in parent regexp.
518							(?:mailto:)?
519							(
520							[-.\w]+
521							\@
522							[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
523							)
524							>
525							}x;
526
527							# Process any escapes we put in the text earlier,
528							# now that the text is complete (end of a block).
529							sub _process_escapes {
530	493			493		588	my $self = $_[0];
531	493					628	my $stash = $self->_private;
532	493					726	local $_ = $_[1];
533
534							# The patterns below are taken from Markdown.pl 1.0.1 _EncodeAmpsAndAngles().
535							# In this case we only want to encode the ones that Markdown won't.
536							# This is overkill but produces nicer looking text (less escaped entities).
537							# If it proves insufficent then we'll just encode them all.
538
539							# $1: If the escape was in a code sequence, simply replace the original.
540							# $2: If the unescaped value would be followed by characters
541							# that could be interpreted as html, entity-encode it.
542							# else: The character is safe to leave bare.
543
544							# Neither currently allows $2 to contain '0' so bool tests are sufficient.
545
546	493	100				784	if( $stash->{encode_amp} ){
547							# Encode & if succeeded by chars that look like an html entity.
548	460					1324	s,$_escape{amp_re}((?:#?[xX]?(?:[0-9a-fA-F]+\|\w+);)?),
549	90	100				400	$1 ? '&'.$2 : $2 ? '&'.$2 : '&',egos;
		100
550							}
551
552	493	100				845	if( $stash->{encode_lt} ){
553							# Encode < if succeeded by chars that look like an html tag.
554							# Leave email addresses () for Markdown to process.
555	460					1827	s,$_escape{lt_re}((?=$EMAIL_MARKER)\|(?:[a-z/?\$!])?),
556	72	100				332	$1 ? '<'.$2 : $2 ? '<'.$2 : '<',egos;
		100
557							}
558
559	493					851	return $_;
560							}
561
562
563							## Parsing ##
564
565							sub handle_text {
566	542			542	0	5467	my $self = $_[0];
567	542					744	my $stash = $self->_private;
568	542					889	local $_ = $_[1];
569
570							# Unless we're in a code span, verbatim block, or formatted region.
571	542	100				908	unless( $stash->{no_escape} ){
572
573							# We could, in theory, alter what gets escaped according to context
574							# (for example, escape square brackets (but not parens) inside link text).
575							# The markdown produced might look slightly nicer but either way you're
576							# at the whim of the markdown processor to interpret things correctly.
577							# For now just escape everything.
578
579							# Don't let literal characters be interpreted as markdown.
580	459					746	$_ = $self->_escape_inline_markdown($_);
581
582							# Entity-encode (or escape for later processing) necessary/desired chars.
583	459					850	$_ = $self->_encode_or_escape_entities($_);
584
585							}
586							# If this _is_ a code section, do limited/specific handling.
587							else {
588							# Always escaping these chars ensures that we won't mangle the text
589							# in the unlikely event that a sequence matching our escape occurred in the
590							# input stream (since we're going to escape it and then unescape it).
591	83	100				327	s/&/$_escape{amp_code}/gos if $stash->{encode_amp};
592	83	100				254	s/{encode_lt};
593							}
594
595	542					911	$self->_save($_);
596							}
597
598							sub start_Document {
599	289			289	0	86474	my ($self) = @_;
600	289					580	$self->_new_stack;
601							}
602
603							sub end_Document {
604	289			289	0	18596	my ($self) = @_;
605	289					616	$self->_check_search_header;
606	289					358	my $end = pop @{ $self->_private->{stacks} };
	289					389
607
608	289	50				341	@{ $self->_private->{stacks} } == 0
	289					384
609							or die 'Document ended with stacks remaining';
610
611	289					776	my @doc = $self->_chomp_all(join('', @$end)) . $/;
612
613	289	100				713	if( $self->include_meta_tags ){
614	6					38	unshift @doc, $self->_build_markdown_head, ($/ x 2);
615							}
616
617	289	100				1669	if( my $encoding = $self->_get_output_encoding ){
618							# Do the check outside the loop(s) for efficiency.
619	55	100				396	my $ents = $HAS_HTML_ENTITIES ? \&__entity_encode_ord_he : \&__entity_encode_ord_basic;
620							# Iterate indices to avoid copying large strings.
621	55					130	for my $i ( 0 .. $#doc ){
622	55					61	print { $self->{output_fh} } Encode::encode($encoding, $doc[$i], $ents);
	55					151
623							}
624							}
625							else {
626	234					1048	print { $self->{output_fh} } @doc;
	234					758
627							}
628							}
629
630							sub _get_output_encoding {
631	289			289		406	my ($self) = @_;
632
633							# If 'match_encoding' is set we need to return an encoding.
634							# If pod has no =encoding, Pod::Simple will guess if it sees a high-bit char.
635							# If there are no high-bit chars, encoding is undef.
636							# Use detected_encoding() rather than encoding() because if Pod::Simple
637							# can't use whatever encoding was specified, we probably can't either.
638							# Fallback to 'o_e' if no match is found. This gives the user the choice,
639							# since otherwise there would be no reason to specify 'o_e' and 'm_e'.
640							# Fallback to UTF-8 since it is a reasonable default these days.
641
642	289	100	100			498	return $self->detected_encoding \|\| $self->output_encoding \|\| 'UTF-8'
643							if $self->match_encoding;
644
645							# If output encoding wasn't specified, return false.
646	277					1357	return $self->output_encoding;
647							}
648
649							## Blocks ##
650
651							sub start_Verbatim {
652	17			17	0	4536	my ($self) = @_;
653	17					47	$self->_new_stack;
654	17					33	$self->_private->{no_escape} = 1;
655							}
656
657							sub end_Verbatim {
658	17			17	0	157	my ($self) = @_;
659
660	17					34	my $text = $self->_pop_stack_text;
661
662	17					41	$text = $self->_indent_verbatim($text);
663
664	17					60	$self->_private->{no_escape} = 0;
665
666							# Verbatim blocks do not generate a separate "Para" event.
667	17					35	$self->_save_block($text);
668							}
669
670							sub _indent_verbatim {
671	17			17		35	my ($self, $paragraph) = @_;
672
673							# NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
674							# Pod::Simple also has a 'strip_verbatim_indent' attribute
675							# but it doesn't sound like it gains us anything over this method.
676
677							# POD verbatim can start with any number of spaces (or tabs)
678							# markdown should be 4 spaces (or a tab)
679							# so indent any paragraphs so that all lines start with at least 4 spaces
680	17					64	my @lines = split /\n/, $paragraph;
681	17					53	my $indent = ' ' x 4;
682	17					36	foreach my $line ( @lines ){
683	32	100				114	next unless $line =~ m/^( +)/;
684							# find the smallest indentation
685	31	100				104	$indent = $1 if length($1) < length($indent);
686							}
687	17	100				40	if( (my $smallest = length($indent)) < 4 ){
688							# invert to get what needs to be prepended
689	11					27	$indent = ' ' x (4 - $smallest);
690
691							# Prepend indent to each line.
692							# We could check /\S/ to only indent non-blank lines,
693							# but it's backward compatible to respect the whitespace.
694							# Additionally, both pod and markdown say they ignore blank lines
695							# so it shouldn't hurt to leave them in.
696	11	100				24	$paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
	23					89
697							}
698
699	17					42	return $paragraph;
700							}
701
702							sub start_Para {
703	341			341	0	120645	$_[0]->_new_stack;
704							}
705
706							sub end_Para {
707	341			341	0	2999	my ($self) = @_;
708	341					523	my $text = $self->_pop_stack_text;
709
710	341					662	$text = $self->_escape_paragraph_markdown($text);
711
712	341					553	$self->_save_block($text);
713							}
714
715
716							## Headings ##
717
718	40			40	0	5247	sub start_head1 { $_[0]->_start_head(1) }
719	40			40	0	372	sub end_head1 { $_[0]->_end_head(1) }
720	11			11	0	2744	sub start_head2 { $_[0]->_start_head(2) }
721	11			11	0	122	sub end_head2 { $_[0]->_end_head(2) }
722	1			1	0	336	sub start_head3 { $_[0]->_start_head(3) }
723	1			1	0	11	sub end_head3 { $_[0]->_end_head(3) }
724	0			0	0	0	sub start_head4 { $_[0]->_start_head(4) }
725	0			0	0	0	sub end_head4 { $_[0]->_end_head(4) }
726
727							sub _check_search_header {
728	341			341		473	my ($self) = @_;
729							# Save the text since the last heading if we want it for metadata.
730	341	100				517	if( my $last = $self->_private->{search_header} ){
731	18					32	for( $self->_private->{$last} = $self->_last_string ){
732	18					40	s/\A\s+//;
733	18					84	s/\s+\z//;
734							}
735							}
736							}
737							sub _start_head {
738	52			52		95	my ($self) = @_;
739	52					116	$self->_check_search_header;
740	52					98	$self->_new_stack;
741							}
742
743							sub _end_head {
744	52			52		82	my ($self, $num) = @_;
745	52					94	my $h = '#' x $num;
746
747	52					98	my $text = $self->_pop_stack_text;
748							$self->_private->{search_header} =
749	52	100				194	$text =~ /NAME/ ? 'Title'
		100
750							: $text =~ /AUTHOR/ ? 'Author'
751							: undef;
752
753							# TODO: option for $h suffix
754							# TODO: put a name="" if $self->{embed_anchor_tags}; ?
755							# https://rt.cpan.org/Ticket/Display.html?id=57776
756	52					155	$self->_save_block(join(' ', $h, $text));
757							}
758
759							## Lists ##
760
761							# With Pod::Simple->parse_empty_lists(1) there could be an over_empty event,
762							# but what would you do with that?
763
764							sub _start_list {
765	22			22		37	my ($self) = @_;
766	22					55	$self->_new_stack;
767
768							# Nest again b/c start_item will pop this to look for preceding content.
769	22					49	$self->_increase_indent;
770	22					39	$self->_new_stack;
771							}
772
773							sub _end_list {
774	22			22		35	my ($self) = @_;
775	22					45	$self->_handle_between_item_content;
776
777							# Finish the list.
778
779							# All the child elements should be blocks,
780							# but don't end with a double newline.
781	22					33	my $text = $self->_chomp_all($self->_pop_stack_text);
782
783	22					76	$_[0]->_save_line($text . $/);
784							}
785
786							sub _handle_between_item_content {
787	65			65		82	my ($self) = @_;
788
789							# This might be empty (if the list item had no additional content).
790	65	100				114	if( my $text = $self->_pop_stack_text ){
791							# Else it's a sub-document.
792							# If there are blocks we need to separate with blank lines.
793	21	100				43	if( $self->_private->{last_state}->{blocks} ){
794	16					39	$text = $/ . $text;
795							}
796							# If not, we can condense the text.
797							# In this module's history there was a patch contributed to specifically
798							# produce "huddled" lists so we'll try to maintain that functionality.
799							else {
800	5					9	$text = $self->_chomp_all($text) . $/;
801							}
802	21					47	$self->_save($text)
803							}
804
805	65					119	$self->_decrease_indent;
806							}
807
808							sub _start_item {
809	43			43		65	my ($self) = @_;
810	43					85	$self->_handle_between_item_content;
811	43					81	$self->_new_stack;
812							}
813
814							sub _end_item {
815	43			43		66	my ($self, $marker) = @_;
816	43					61	my $text = $self->_pop_stack_text;
817	43	100	66			215	$self->_save_line($self->_indent($marker .
818							# Add a space only if there is text after the marker.
819							(defined($text) && length($text) ? ' ' . $text : '')
820							));
821
822							# Store any possible contents in a new stack (like a sub-document).
823	43					92	$self->_increase_indent;
824	43					54	$self->_new_stack;
825							}
826
827	7			7	0	1299	sub start_over_bullet { $_[0]->_start_list }
828	7			7	0	527	sub end_over_bullet { $_[0]->_end_list }
829
830	18			18	0	4712	sub start_item_bullet { $_[0]->_start_item }
831	18			18	0	140	sub end_item_bullet { $_[0]->_end_item('-') }
832
833	9			9	0	1958	sub start_over_number { $_[0]->_start_list }
834	9			9	0	991	sub end_over_number { $_[0]->_end_list }
835
836							sub start_item_number {
837	18			18	0	4638	$_[0]->_start_item;
838							# It seems like this should be a stack,
839							# but from testing it appears that the corresponding 'end' event
840							# comes right after the text (it doesn't surround any embedded content).
841							# See t/nested.t which shows start-item, text, end-item, para, start-item....
842	18					35	$_[0]->_private->{item_number} = $_[1]->{number};
843							}
844
845							sub end_item_number {
846	18			18	0	148	my ($self) = @_;
847	18					27	$self->_end_item($self->_private->{item_number} . '.');
848							}
849
850							# Markdown doesn't support definition lists
851							# so do regular (unordered) lists with indented paragraphs.
852	6			6	0	1357	sub start_over_text { $_[0]->_start_list }
853	6			6	0	574	sub end_over_text { $_[0]->_end_list }
854
855	7			7	0	1761	sub start_item_text { $_[0]->_start_item }
856	7			7	0	61	sub end_item_text { $_[0]->_end_item('-')}
857
858
859							# perlpodspec equates an over/back region with no items to a blockquote.
860							sub start_over_block {
861							# NOTE: We don't actually need to indent for a blockquote.
862	3			3	0	719	$_[0]->_new_stack;
863							}
864
865							sub end_over_block {
866	3			3	0	406	my ($self) = @_;
867
868							# Chomp first to avoid prefixing a blank line with a `>`.
869	3					7	my $text = $self->_chomp_all($self->_pop_stack_text);
870
871							# NOTE: Paragraphs will already be escaped.
872
873							# I don't really like either of these implementations
874							# but the join/map/split seems a little better and benches a little faster.
875							# You would lose the last newline but we've already chomped.
876							#$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
877	3	100				38	$text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;
	22					48
878
879	3					12	$self->_save_block($text);
880							}
881
882							## Custom Formats ##
883
884							sub start_for {
885	10			10	0	2354	my ($self, $attr) = @_;
886	10					26	$self->_new_stack;
887
888	10	100				30	if( $attr->{target} eq 'html' ){
889							# Use another stack so we can indent
890							# (not syntactily necessary but seems appropriate).
891	2					7	$self->_new_stack;
892	2					10	$self->_increase_indent;
893	2					46	$self->_private->{no_escape} = 1;
894							# Mark this so we know to undo it.
895	2					6	$self->_stack_state->{for_html} = 1;
896							}
897							}
898
899							sub end_for {
900	10			10	0	589	my ($self) = @_;
901							# Data gets saved as a block (which will handle indents),
902							# but if there was html we'll alter this, so chomp and save a block again.
903	10					14	my $text = $self->_chomp_all($self->_pop_stack_text);
904
905	10	100				14	if( $self->_private->{last_state}->{for_html} ){
906	2					5	$self->_private->{no_escape} = 0;
907							# Save it to the next stack up so we can pop it again (we made two stacks).
908	2					6	$self->_save($text);
909	2					6	$self->_decrease_indent;
910	2					4	$text = join "\n", ' ', $self->_chomp_all($self->_pop_stack_text), ' ';
911							}
912
913	10					18	$self->_save_block($text);
914							}
915
916							# Data events will be emitted for any formatted regions that have been enabled
917							# (by default, `markdown` and `html`).
918
919							sub start_Data {
920	5			5	0	396	my ($self) = @_;
921							# TODO: limit this to what's in attr?
922	5					11	$self->_private->{no_escape}++;
923	5					11	$self->_new_stack;
924							}
925
926							sub end_Data {
927	5			5	0	46	my ($self) = @_;
928	5					10	my $text = $self->_pop_stack_text;
929	5					8	$self->_private->{no_escape}--;
930	5					9	$self->_save_block($text);
931							}
932
933							## Codes ##
934
935	94			94	0	735	sub start_B { $_[0]->_save('**') }
936	47			47	0	425	sub end_B { $_[0]->start_B() }
937
938	36			36	0	281	sub start_I { $_[0]->_save('_') }
939	18			18	0	159	sub end_I { $_[0]->start_I() }
940
941							sub start_C {
942	61			61	0	864	my ($self) = @_;
943	61					133	$self->_new_stack;
944	61					98	$self->_private->{no_escape}++;
945							}
946
947							sub end_C {
948	61			61	0	481	my ($self) = @_;
949	61					101	$self->_private->{no_escape}--;
950	61					149	$self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
951							}
952
953							# Use code spans for F<>.
954	4			4	0	66	sub start_F { shift->start_C(@_); }
955	4			4	0	41	sub end_F { shift ->end_C(@_); }
956
957							sub start_L {
958	91			91	0	1236	my ($self, $flags) = @_;
959	91					164	$self->_new_stack;
960	91					122	push @{ $self->_private->{link} }, $flags;
	91					134
961							}
962
963							sub end_L {
964	91			91	0	710	my ($self) = @_;
965	91	50				96	my $flags = pop @{ $self->_private->{link} }
	91					127
966							or die 'Invalid state: link end with no link start';
967
968	91					121	my ($type, $to, $section) = @{$flags}{qw( type to section )};
	91					199
969
970	91	50				271	my $url = (
		100
		100
971							$type eq 'url' ? $to
972							: $type eq 'man' ? $self->format_man_url($to, $section)
973							: $type eq 'pod' ? $self->format_perldoc_url($to, $section)
974							: undef
975							);
976
977	91					169	my $text = $self->_pop_stack_text;
978
979							# NOTE: I don't think the perlpodspec says what to do with L<\|blah>
980							# but it seems like a blank link text just doesn't make sense
981	91	100				182	if( !length($text) ){
982	4	100				9	$text =
		100
983							$section ?
984							$to ? sprintf('"%s" in %s', $section, $to)
985							: ('"' . $section . '"')
986							: $to;
987							}
988
989							# FIXME: What does Pod::Simple::X?HTML do for this?
990							# if we don't know how to handle the url just print the pod back out
991	91	50				245	if (!$url) {
992	0					0	$self->_save(sprintf 'L<%s>', $flags->{raw});
993	0					0	return;
994							}
995
996							# In the url we need to escape quotes and parentheses lest markdown
997							# break the url (cut it short and/or wrongfully interpret a title).
998
999							# Backslash escapes do not work for the space and quotes.
1000							# URL-encoding the space is not sufficient
1001							# (the quotes confuse some parsers and produce invalid html).
1002							# I've arbitratily chosen HTML encoding to hide them from markdown
1003							# while mangling the url as litle as possible.
1004	91					553	$url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;
	11					55
1005
1006							# We also need to double any backslashes that may be present
1007							# (lest they be swallowed up) and stop parens from breaking the url.
1008	91					326	$url =~ s/([\\()])/\\$1/g;
1009
1010							# TODO: put section name in title if not the same as $text
1011	91					455	$self->_save('[' . $text . '](' . $url . ')');
1012							}
1013
1014							sub start_X {
1015	1			1	0	16	$_[0]->_new_stack;
1016							}
1017
1018							sub end_X {
1019	1			1	0	10	my ($self) = @_;
1020	1					3	my $text = $self->_pop_stack_text;
1021							# TODO: mangle $text?
1022							# TODO: put if configured
1023							}
1024
1025							# A code span can be delimited by multiple backticks (and a space)
1026							# similar to pod codes (C<< code >>), so ensure we use a big enough
1027							# delimiter to not have it broken by embedded backticks.
1028							sub _wrap_code_span {
1029	61			61		113	my ($self, $arg) = @_;
1030	61					105	my $longest = 0;
1031	61					191	while( $arg =~ /([`]+)/g ){
1032	5					12	my $len = length($1);
1033	5	100				21	$longest = $len if $longest < $len;
1034							}
1035	61					127	my $delim = '`' x ($longest + 1);
1036	61	100				131	my $pad = $longest > 0 ? ' ' : '';
1037	61					215	return $delim . $pad . $arg . $pad . $delim;
1038							}
1039
1040							## Link Formatting (TODO: Move this to another module) ##
1041
1042
1043							sub format_man_url {
1044	6			6	1	8	my ($self, $to) = @_;
1045	6					13	my ($page, $part) = ($to =~ /^ ([^(]+) (?: $ (\S+) $ )? /x);
1046	6		50			99	return $self->man_url_prefix . ($part \|\| 1) . '/' . ($page \|\| $to);
			33
1047							}
1048
1049
1050							sub format_perldoc_url {
1051	68			68	1	141	my ($self, $name, $section) = @_;
1052
1053	68		66			187	my $url_prefix = defined($name)
1054							&& $self->is_local_module($name)
1055							&& $self->local_module_url_prefix
1056							\|\| $self->perldoc_url_prefix;
1057
1058	68					94	my $url = '';
1059
1060							# If the link is to another module (external link).
1061	68	100				125	if ($name) {
1062	57					1155	$url = $url_prefix . $name;
1063							}
1064
1065							# See https://rt.cpan.org/Ticket/Display.html?id=57776
1066							# for a discussion on the need to mangle the section.
1067	68	100				641	if ($section){
1068
1069	42	100				987	my $method = $url
1070							# If we already have a prefix on the url it's external.
1071							? $self->perldoc_fragment_format
1072							# Else an internal link points to this markdown doc.
1073							: $self->markdown_fragment_format;
1074
1075	42	100				250	$method = 'format_fragment_' . $method
1076							unless ref($method);
1077
1078							{
1079							# Set topic to enable code refs to be simple.
1080	42					50	local $_ = $section;
	42					54
1081	42					89	$section = $self->$method($section);
1082							}
1083
1084	42					110	$url .= '#' . $section;
1085							}
1086
1087	68					369	return $url;
1088							}
1089
1090
1091							# TODO: simple, pandoc, etc?
1092
1093							sub format_fragment_markdown {
1094	3			3	1	7	my ($self, $section) = @_;
1095
1096							# If this is an internal link (to another section in this doc)
1097							# we can't be sure what the heading id's will look like
1098							# (it depends on what is rendering the markdown to html)
1099							# but we can try to follow popular conventions.
1100
1101							# http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
1102							#$section =~ s/(?![-_.])[[:punct:]]//g;
1103							#$section =~ s/\s+/-/g;
1104	3					6	$section =~ s/\W+/-/g;
1105	3					104	$section =~ s/-+$//;
1106	3					5	$section =~ s/^-+//;
1107	3					9	$section = lc $section;
1108							#$section =~ s/^[^a-z]+//;
1109	3		50			7	$section \|\|= 'section';
1110
1111	3					5	return $section;
1112							}
1113
1114
1115							{
1116							# From Pod::Simple::XHTML 3.28.
1117							# The strings gets passed through encode_entities() before idify().
1118							# If we don't do it here the substitutions below won't operate consistently.
1119
1120							sub format_fragment_pod_simple_xhtml {
1121	9			9	1	11	my ($self, $t) = @_;
1122
1123							# encode_entities {
1124							# We need to use the defaults in case html_encode_chars has been customized
1125							# (since the purpose is to match what external sources are doing).
1126
1127	9					18	local $self->_private->{html_encode_chars};
1128	9					21	$t = $self->encode_entities($t);
1129							# }
1130
1131							# idify {
1132	9					286	for ($t) {
1133	9					15	s/<[^>]+>//g; # Strip HTML.
1134	9					83	s/&[^;]+;//g; # Strip entities.
1135	9					73	s/^\s+//; s/\s+$//; # Strip white space.
	9					91
1136	9					79	s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
1137	9					95	s/^[^a-zA-Z]+//; # First char must be a letter.
1138	9					72	s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
1139	9					118	s/[-:.]+$//; # Strip trailing punctuation.
1140							}
1141							# }
1142
1143	9					43	return $t;
1144							}
1145							}
1146
1147
1148							sub format_fragment_pod_simple_html {
1149	9			9	1	13	my ($self, $section) = @_;
1150
1151							# From Pod::Simple::HTML 3.28.
1152
1153							# section_name_tidy {
1154	9					16	$section =~ s/^\s+//;
1155	9					128	$section =~ s/\s+$//;
1156	9					130	$section =~ tr/ /_/;
1157	9					220	$section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
1158
1159							#$section = $self->unicode_escape_url($section);
1160							# unicode_escape_url {
1161	9					18	$section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
	0					0
1162							# Turn char 1234 into "(1234)"
1163							# }
1164
1165	9	50				19	$section = '_' unless length $section;
1166	9					17	return $section;
1167							# }
1168							}
1169
1170
1171	9			9	1	19	sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
1172	9			9	1	19	sub format_fragment_sco { shift->format_fragment_pod_simple_html(@_); }
1173
1174
1175							sub is_local_module {
1176	57			57	1	82	my ($self, $name) = @_;
1177
1178	57					113	return ($name =~ $self->local_module_re);
1179							}
1180
1181							1;
1182
1183							__END__