File Coverage

blib/lib/Pod/Simple/Pandoc.pm

Criterion	Covered	Total	%
statement	152	208	73.0
branch	45	82	54.8
condition	4	24	16.6
subroutine	28	34	82.3
pod	6	9	66.6
total	235	357	65.8

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::Simple::Pandoc;
2	5			5		165964	use strict;
	5					7
	5					127
3	5			5		16	use warnings;
	5					5
	5					110
4	5			5		83	use 5.010;
	5					12
5
6							our $VERSION = '0.346.0';
7
8	5			5		2209	use Pod::Simple::SimpleTree;
	5					60983
	5					112
9	5			5		1613	use Pandoc::Elements;
	5					193392
	5					1010
10	5			5		2403	use Pandoc::Filter::HeaderIdentifiers;
	5					13405
	5					268
11	5			5		1796	use Pod::Pandoc::Modules;
	5					7
	5					152
12	5			5		27	use Pandoc;
	5					4
	5					29
13	5			5		344	use File::Find ();
	5					4
	5					56
14	5			5		16	use File::Spec;
	5					5
	5					69
15	5			5		14	use IPC::Run3;
	5					4
	5					167
16	5			5		15	use Carp;
	5					6
	5					159
17	5			5		15	use utf8;
	5					6
	5					29
18
19							sub new {
20	2			2	0	20	my ( $class, %opt ) = @_;
21
22	2		50			14	$opt{targets} //= [];
23	2	50				5	if ( $opt{'data-sections'} ) {
24	0					0	Pandoc->require('1.12');
25	0					0	push @{ $opt{targets} }, pandoc->input_formats;
	0					0
26							}
27
28	2		50			13	$opt{podurl} //= 'https://metacpan.org/pod/';
29
30	2					8	bless \%opt, $class;
31							}
32
33							sub _parser {
34	4			4		6	my $self = shift;
35
36	4					44	my $parser = Pod::Simple::SimpleTree->new;
37	4					103	$parser->nix_X_codes(1); # ignore X<...> codes
38	4					51	$parser->nbsp_for_S(1); # map S<...> to U+00A0 (non-breaking space)
39	4					26	$parser->merge_text(1); # emit text nodes combined
40	4					24	$parser->no_errata_section(1); # omit errata section
41	4					25	$parser->complain_stderr(1); # TODO: configure
42	4					25	$parser->accept_targets('*'); # include all data sections
43
44							# remove shortest leading whitespace string from verbatim sections
45							$parser->strip_verbatim_indent(
46							sub {
47	6			6		64738	my $indent = length $_[0][1];
48	6					7	for ( @{ $_[0] } ) {
	6					12
49	30					34	$_ =~ /^(\s*)/;
50	30	100				57	$indent = length($1) if length($1) < $indent;
51							}
52	6					16	' ' x $indent;
53							}
54	4					73	);
55
56	4					25	return $parser;
57							}
58
59							sub parse_file {
60	4			4	1	154274	my ( $self, $file ) = @_;
61
62							# Pod::Simple::parse_file does not detect this
63	4	50				90	croak "Can't use directory as a source for parse_file" if -d $file;
64
65	4					13	my $doc = $self->parse_tree( $self->_parser->parse_file($file)->root );
66
67	2	50	33			397	if ( !ref $file and $file ne '-' ) {
68	2					7	$doc->meta->{file} = MetaString($file);
69							}
70
71	2					33	$doc;
72							}
73
74							sub parse_module {
75	1			1	1	1044	my ( $self, $name ) = @_;
76
77							# map module name to name
78	1					7	run3 [ 'perldoc', '-lm', $name ], undef, \$name;
79	1	50				40391	croak $? if $?;
80	0					0	chomp $name;
81
82	0					0	$self->parse_file($name);
83							}
84
85							sub parse_string {
86	0			0	1	0	my ( $self, $string ) = @_;
87	0					0	$self->parse_tree( $self->_parser->parse_string_document($string)->root );
88							}
89
90							sub parse_tree {
91	2			2	0	8757	my $doc = Pandoc::Filter::HeaderIdentifiers->new->apply( _pod_element(@_) );
92
93	2					420003	my $sections = $doc->outline(1)->{sections};
94	2	50				1934	if ( my ($name) = grep { $_->{header}->string eq 'NAME' } @$sections ) {
	14					2813
95
96							# TODO: support formatting
97	2					836	my $text = $name->{blocks}->[0]->string;
98	2					2953	my ( $title, $subtitle ) = $text =~ m{^\s([^ ]+)\s[:-]\s(.+)};
99	2	50				23	$doc->meta->{title} = MetaString($title) if $title;
100	2	50				75	$doc->meta->{subtitle} = MetaString($subtitle) if $subtitle;
101							}
102
103	2					29	$doc;
104							}
105
106							sub parse_and_merge {
107	0			0	1	0	my ( $self, @input ) = @_;
108
109	0					0	my $doc;
110
111	0					0	foreach my $file (@input) {
112
113	0	0	0			0	my $cur =
114							( $file ne '-' and not -e $file )
115							? $self->parse_module($file)
116							: $self->parse_file($file);
117
118	0	0				0	if ($doc) {
119	0					0	push @{ $doc->content }, @{ $cur->content };
	0					0
	0					0
120							}
121							else {
122	0					0	$doc = $cur;
123							}
124							}
125
126	0	0				0	return unless $doc;
127
128	0					0	$doc->meta->{file} = MetaList [ map { MetaString $_ } @input ];
	0					0
129
130	0					0	return $doc;
131							}
132
133							sub is_perl_file {
134	0			0	0	0	my $file = shift;
135	0	0				0	return 1 if $file =~ /\.(pm\|pod)$/;
136	0	0				0	if ( -x $file ) {
137	0	0				0	open( my $fh, '<', $file ) or return;
138	0	0	0			0	return 1 if $fh and ( <$fh> // '' ) =~ /^#!.*perl/;
			0
139							}
140	0					0	0;
141							}
142
143							sub parse_dir {
144	0			0	1	0	my ( $parser, $directory ) = @_;
145	0					0	my $files = {};
146
147							File::Find::find(
148							{
149							no_chdir => 1,
150							wanted => sub {
151	0			0		0	my $file = $_;
152	0	0				0	return unless is_perl_file($file);
153	0					0	my $doc = $parser->parse_file($file);
154	0					0	my $base = File::Spec->abs2rel( $directory, $file );
155	0					0	$base =~ s/\.\.$//;
156	0					0	$doc->meta->{base} = MetaString $base;
157	0					0	$files->{$file} = $doc;
158							}
159							},
160	0					0	$directory
161							);
162
163	0					0	$files;
164							}
165
166							sub parse_modules {
167	0			0	1	0	my ( $parser, $dir, %opt ) = @_;
168	0	0				0	return unless -d $dir;
169
170	0					0	my $modules = Pod::Pandoc::Modules->new;
171
172	0					0	foreach my $doc ( values %{ $parser->parse_dir($dir) } ) {
	0					0
173	0					0	my $file = $doc->metavalue('file');
174	0					0	my $module = File::Spec->abs2rel( $file, $dir );
175	0					0	$module =~ s{\.(pm\|pod)$}{}g;
176	0					0	$module =~ s{/}{::}g;
177	0	0	0			0	if ( ( $doc->metavalue('title') // $module ) eq $module ) {
178	0	0	0			0	if ( not $modules->add( $module => $doc ) and $opt{quiet} ) {
179							warn "$file skipped for "
180	0					0	. $modules->{$module}->metavalue('file');
181							}
182							}
183							else {
184	0	0				0	warn "$file NAME does not match module\n" unless $opt{quiet};
185							}
186							}
187
188	0					0	$modules;
189							}
190
191							my %POD_ELEMENT_TYPES = (
192							Document => sub {
193							Document {}, [ _pod_content(@_) ];
194							},
195							Para => sub {
196							Para [ _pod_content(@_) ];
197							},
198							I => sub {
199							Emph [ _pod_content(@_) ];
200							},
201							B => sub {
202							Strong [ _pod_content(@_) ];
203							},
204							L => \&_pod_link,
205							C => sub {
206							Code attributes {}, _pod_flatten(@_);
207							},
208							F => sub {
209							Code attributes { classes => ['filename'] }, _pod_flatten(@_);
210							},
211							head1 => sub {
212							Header 1, attributes {}, [ _pod_content(@_) ];
213							},
214							head2 => sub {
215							Header 2, attributes {}, [ _pod_content(@_) ];
216							},
217							head3 => sub {
218							Header 3, attributes {}, [ _pod_content(@_) ];
219							},
220							head4 => sub {
221							Header 4, attributes {}, [ _pod_content(@_) ];
222							},
223							Verbatim => sub {
224							CodeBlock attributes {}, _pod_flatten(@_);
225							},
226							'over-bullet' => sub {
227							BulletList [ _pod_list(@_) ];
228							},
229							'over-number' => sub {
230							OrderedList [ 1, DefaultStyle, DefaultDelim ], [ _pod_list(@_) ];
231							},
232							'over-text' => sub {
233							DefinitionList [ _pod_list(@_) ];
234							},
235							'over-block' => sub {
236							BlockQuote [ _pod_content(@_) ];
237							},
238							'for' => \&_pod_data,
239							);
240
241							# option --smart
242							sub _str {
243	1284			1284		5664	my $s = shift;
244	1284					976	$s =~ s/\.\.\./…/g;
245	1284					1418	Str $s;
246							}
247
248							# map a single element or text to a list of Pandoc elements
249							sub _pod_element {
250	640			640		576	my ( $self, $element ) = @_;
251
252	640	100				728	if ( ref $element ) {
253	310	50				662	my $type = $POD_ELEMENT_TYPES{ $element->[0] } or return;
254	310					362	$type->( $self, $element );
255							}
256							else {
257	330					199	my $n = 0;
258	330	100				1048	map { $n++ ? ( Space, _str($_) ) : _str($_) }
	1284					6704
259							split( /\s+/, $element, -1 );
260							}
261							}
262
263							# map the content of a Pod element to a list of Pandoc elements
264							sub _pod_content {
265	200			200		15372	my ( $self, $element ) = @_;
266	200					150	my $length = scalar @$element;
267	200					315	map { _pod_element( $self, $_ ) } @$element[ 2 .. ( $length - 1 ) ];
	628					5042
268							}
269
270							# stringify the content of an element
271							sub _pod_flatten {
272	110			110		14761	my $string = '';
273	110					85	my $walk;
274							$walk = sub {
275	114			114		97	my ($element) = @_;
276	114					91	my $n = scalar @$element;
277	114					207	for ( @$element[ 2 .. $n - 1 ] ) {
278	116	100				140	if ( ref $_ ) {
279	4					14	$walk->($_);
280							}
281							else {
282	112					200	$string .= $_;
283							}
284							}
285	110					303	};
286	110					153	$walk->( $_[1] );
287
288	110					217	return $string;
289							}
290
291							# map link
292							sub _pod_link {
293	68			68		53	my ( $self, $link ) = @_;
294	68					88	my $type = $link->[1]{type};
295	68					69	my $to = $link->[1]{to};
296	68					50	my $section = $link->[1]{section};
297	68					56	my $url = '';
298
299	68	100				146	if ( $type eq 'url' ) {
		100
		50
300	6					21	$url = "$to";
301							}
302							elsif ( $type eq 'man' ) {
303	6	50				12	if ( $to =~ /^([^(]+)(?:[(](\d+)[)])?$/ ) {
304
305							# TODO: configure MAN_URL, e.g.
306							# http://man7.org/linux/man-pages/man{section}/{name}.{section}.html
307	6					94	$url = "http://linux.die.net/man/$2/$1";
308
309							# TODO: add section to URL if given
310							}
311							}
312							elsif ( $type eq 'pod' ) {
313	56	100				115	if ($to) {
314	50					788	$url = $self->{podurl} . $to;
315							}
316	56	100				382	if ($section) {
317	10	100				149	$section = header_identifier("$section") unless $to; # internal link
318	10					3839	$url .= "#" . $section;
319							}
320							}
321
322	68					193	return Link attributes {}, [ _pod_content( $self, $link ) ], [ $url, '' ];
323							}
324
325							# map data section
326							sub _pod_data {
327	10			10		9	my ( $self, $element ) = @_;
328	10					20	my $target = lc( $element->[1]{target} );
329
330	10					8	my $length = scalar @$element;
331	12					22	my $content = join "\n\n", map { $_->[2] }
332	10					21	grep { $_->[0] eq 'Data' } @$element[ 2 .. $length - 1 ];
	12					27
333
334							# cleanup HTML and Tex blocks
335	10	100				39	if ( $target eq 'html' ) {
		100
336	4	100				23	$content = " $content " if $content !~ /^<.+>$/s;
337							}
338							elsif ( $target =~ /^(la)?tex$/ ) {
339
340							# TODO: more intelligent check & grouping, especiall at the end
341	4	50				12	$content = "\\begingroup $content \\endgroup" if $content !~ /^[\\{]/;
342	4					4	$target = 'tex';
343							}
344
345							# parse and insert known formats if requested
346	10	100				22	my $format = $target eq 'tex' ? 'latex' : $target;
347	10	50				9	if ( grep { $format eq $_ } @{ $self->{targets} } ) {
	0					0
	10					21
348	0					0	utf8::decode($content);
349	0					0	my $doc = pandoc->parse( $format => $content, '--smart' );
350	0					0	return @{ $doc->content };
	0					0
351							}
352
353	10					27	RawBlock( $target, "$content\n" );
354
355							# TODO: add Null element to not merge with following content
356							}
357
358							# map a list (any kind)
359							sub _pod_list {
360	8			8		67	my ( $self, $element ) = @_;
361	8					8	my $length = scalar @$element;
362
363	8					17	my $deflist = $element->[2][0] eq 'item-text';
364	8					5	my @list;
365	8					9	my $item = [];
366
367							my $push_item = sub {
368	26	100		26		43	return unless @$item;
369	18	100				24	if ($deflist) {
370	10					12	my $term = shift @$item;
371	10					166	push @list, [ $term->content, [$item] ];
372							}
373							else {
374	8					9	push @list, $item;
375							}
376	8					27	};
377
378	8					19	foreach my $e ( @$element[ 2 .. $length - 1 ] ) {
379	28					310	my $type = $e->[0];
380	28	100				72	if ( $type =~ /^item-(number\|bullet\|text)$/ ) {
381	18					22	$push_item->();
382	18					64	$item = [ Plain [ _pod_content( $self, $e ) ] ];
383							}
384							else {
385	10	50	33			42	if ( @$item == 1 and $item->[0]->name eq 'Plain' ) {
386
387							# first block element in item should better be Paragraph
388	10					197	$item->[0] = Para $item->[0]->content;
389							}
390	10					118	push @$item, _pod_element( $self, $e );
391							}
392							}
393	8					122	$push_item->();
394
395							# BulletList/OrderedList: [ @blocks ], ...
396							# DefinitionList: [ [ @inlines ], [ @blocks ] ], ...
397	8					105	return @list;
398							}
399
400							1;
401							__END__