File Coverage

blib/lib/Markdent/Dialect/Theory/BlockParser.pm

Criterion	Covered	Total	%
statement	184	186	98.9
branch	57	66	86.3
condition	15	22	68.1
subroutine	27	27	100.0
pod			n/a
total	283	301	94.0

line	stmt	bran	cond	sub	time	code
1						package Markdent::Dialect::Theory::BlockParser;
2
3	4			4	229082	use strict;
	4				13
	4				117
4	4			4	22	use warnings;
	4				7
	4				98
5	4			4	525	use namespace::autoclean;
	4				18892
	4				25
6
7						our $VERSION = '0.39';
8
9	4			4	976	use List::AllUtils qw( insert_after_string sum );
	4				10961
	4				260
10	4			4	1728	use Markdent::Event::StartTable;
	4				16
	4				158
11	4			4	2997	use Markdent::Event::EndTable;
	4				16
	4				158
12	4			4	2420	use Markdent::Event::StartTableHeader;
	4				16
	4				153
13	4			4	2398	use Markdent::Event::EndTableHeader;
	4				16
	4				167
14	4			4	2417	use Markdent::Event::StartTableBody;
	4				14
	4				164
15	4			4	2289	use Markdent::Event::EndTableBody;
	4				17
	4				168
16	4			4	2359	use Markdent::Event::StartTableRow;
	4				15
	4				154
17	4			4	2314	use Markdent::Event::EndTableRow;
	4				17
	4				162
18	4			4	2341	use Markdent::Event::StartTableCell;
	4				16
	4				166
19	4			4	2066	use Markdent::Event::EndTableCell;
	4				18
	4				183
20	4			4	563	use Markdent::Regexes qw( $HorizontalWS $EmptyLine $BlockStart $BlockEnd );
	4				8
	4				628
21	4			4	28	use Markdent::Types;
	4				11
	4				77
22
23	4			4	96009	use Moose::Role;
	4				8
	4				48
24
25						with 'Markdent::Role::Dialect::BlockParser';
26
27						has _in_table => (
28						traits => ['Bool'],
29						is => 'ro',
30						isa => t('Bool'),
31						default => 0,
32						init_arg => undef,
33						handles => {
34						_enter_table => 'set',
35						_leave_table => 'unset',
36						},
37						);
38
39						around _possible_block_matches => sub {
40						my $orig = shift;
41						my $self = shift;
42
43						my @look_for = $self->$orig();
44
45						return @look_for if $self->_list_level;
46
47						if ( $self->_in_table ) {
48						insert_after_string 'list', 'table_cell', @look_for;
49						}
50						else {
51						insert_after_string 'list', 'table', @look_for;
52						}
53
54						return @look_for;
55						};
56
57						my $TableCaption = qr{ ^
58						$HorizontalWS*
59						\[
60						(.*)
61						\]
62						$HorizontalWS*
63						\n
64						}xm;
65
66						# The use of (?> ... ) in the various regexes below forces the regex engine
67						# not to backtrack once it matches the relevant subsection. Using this where
68						# possible _hugely_ speeds up matching, and seems to be safe. At least, the
69						# tests pass.
70
71						my $PipeRow = qr{ ^
72						[\|]? # optional starting pipe
73						(?:
74						(?:
75						(?>[^\\|\\\n]*) # safe chars (not pipe or escape or newline)
76						\|
77						\\[\|] # an escaped newline
78						)+
79						[\|] # must have at least one pipe
80						)+
81						.* # can have a final cell after the last pipe
82						}xm;
83
84						my $ColonRow = qr{ ^
85						:?
86						(?:
87						(?:
88						(?>[^:\\\n]*)
89						\|
90						\\:
91						)+
92						:
93						)+
94						.*
95						}xm;
96
97						my $TableRow = qr{ (?>$PipeRow) # must have at least one starting row
98						\n
99						(?>
100						(?:
101						$ColonRow
102						\n
103						)* # ... can have 0+ continuation lines
104						)
105						}xm;
106
107						my $HeaderMarkerLine = qr/^[\-\+=]+\n/xm;
108
109						my $TableHeader = qr{ $TableRow
110						$HeaderMarkerLine
111						}xm;
112
113						## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
114						sub _match_table {
115	29			29	81	my $self = shift;
116	29				61	my $text = shift;
117
118	29	100			51	return unless ${$text} =~ / \G
	29				2571
119						$BlockStart
120						(
121						$TableCaption?
122						$HeaderMarkerLine?
123						($TableHeader+)?
124						(
125						$TableRow
126						(?:
127						$TableRow
128						\|
129						$EmptyLine
130						)*
131						)
132						$HeaderMarkerLine?
133						$TableCaption?
134						)
135						$BlockEnd
136						/xmgc;
137
138	21	50			2371	$self->_debug_parse_result(
139						$1,
140						'table',
141						) if $self->debug;
142
143	21	100			102	my $caption = defined $2 ? $2 : $5;
144
145	21	50	66		257	$self->_debug_parse_result(
146						$caption,
147						'table caption',
148						) if defined $caption && $self->debug;
149
150	21				60	my $header = $3;
151	21				76	my $body = $4;
152
153	21	50			560	$self->_debug_parse_result(
154						$header,
155						'table header',
156						) if $self->debug;
157
158	21	50			556	$self->_debug_parse_result(
159						$body,
160						'table body',
161						) if $self->debug;
162
163	21				46	my @header;
164
165	21	100			62	if ( defined $header ) {
166	17				113	@header = $self->_parse_rows( qr/$HeaderMarkerLine/m, $header );
167	17				68	$_->{is_header_cell} = 1 for map { @{$_} } @header;
	20				37
	20				114
168						}
169
170	21				112	my @body = $self->_parse_rows( qr/\n/, $body );
171
172	21				108	$self->_normalize_cell_count_and_alignments( @header, @body );
173
174	21	100			63	if (@header) {
175	17				44	my $first_header_cell_content = $header[0][0]{content};
176	17	100	66		151	unless ( defined $first_header_cell_content
177						&& $first_header_cell_content =~ /\S/ ) {
178	1				6	$_->[0]{is_header_cell} = 1 for @body;
179						}
180						}
181
182	21				1029	$self->_enter_table;
183
184	21	100			74	my %caption = defined $caption ? ( caption => $caption ) : ();
185	21				125	$self->_send_event( 'StartTable', %caption );
186
187	21	100			641	$self->_events_for_rows( \@header, 'Header' )
188						if @header;
189	21				548	$self->_events_for_rows( \@body, 'Body' );
190
191	21				616	$self->_send_event('EndTable');
192
193	21				592	$self->_leave_table;
194
195	21				531	return 1;
196						}
197						## use critic
198
199						sub _parse_rows {
200	38			38	84	my $self = shift;
201	38				58	my $split_re = shift;
202	38				72	my $rows = shift;
203
204	38				73	my @rows;
205
206	38				324	for my $chunk ( split $split_re, $rows ) {
207
208						# Splitting on an empty string returns nothing, so we need to
209						# special-case that, as we want to preserve empty lines.
210	168	100			603	for my $line ( length $chunk ? ( split /\n/, $chunk ) : $chunk ) {
211	168	100			846	if ( $line =~ /^$HorizontalWS*$/ ) {
		100
212	2				7	push @rows, undef;
213						}
214						elsif ( $self->_is_continuation_line($line) ) {
215
216						# If the $TableRow regex is correct, this shouldn't be
217						# possible.
218	7	50			23	die q{Continuation of a row before we've seen a row start?!}
219						unless @rows;
220
221	7				19	my $cells = $self->_cells_from_line( $line, ':' );
222
223	7				13	for my $i ( 0 .. $#{$cells} ) {
	7				21
224	15	100	66		79	if ( defined $cells->[$i]{content}
225						&& $cells->[$i]{content} =~ /\S/ ) {
226						$rows[-1][$i]{content}
227	9				29	.= "\n" . $cells->[$i]{content};
228	9		100		37	$rows[-1][$i]{colspan} \|\|= 1;
229						}
230						}
231						}
232						else {
233	159				338	push @rows, $self->_cells_from_line( $line, '\|' );
234						}
235						}
236						}
237
238	38				131	return @rows;
239						}
240
241						sub _is_continuation_line {
242	166			166	584	my $self = shift;
243	166				219	my $line = shift;
244
245	166	100			623	return 0
246						if $line =~ /(?<!\\)[\|]/x;
247
248	7	50			39	return 1
249						if $line =~ /(^\|\p{SpaceSeparator}+)(?<!\\):(\p{SpaceSeparator}\|$)/x;
250
251						# a blank line, presumably
252	0				0	return 0;
253						}
254
255						sub _cells_from_line {
256	166			166	227	my $self = shift;
257	166				222	my $line = shift;
258	166				208	my $div = shift;
259
260	166				195	my @row;
261
262	166				322	for my $cell ( $self->_split_cells( $line, $div ) ) {
263	657	100			1258	if ( length $cell ) {
		50
264	642				1069	push @row, $self->_cell_params($cell);
265						}
266
267						# If the first cell is empty, that means the line started with a
268						# divider, and we can ignore the "cell". If we already have cells in
269						# the row, that means we just saw a repeated divider, which means the
270						# most recent cell has a colspan+1.
271						elsif (@row) {
272	15				34	$row[-1]{colspan}++;
273						}
274						}
275
276	166				489	return \@row;
277						}
278
279						sub _split_cells {
280	166			166	217	my $self = shift;
281	166				203	my $line = shift;
282	166				246	my $div = shift;
283
284	166				735	$line =~ s/^\Q$div//;
285	166				1246	$line =~ s/\Q$div\E$HorizontalWS*$/$div/;
286
287						# We don't want to split on a backslash-escaped divider, thus the
288						# lookbehind. The -1 ensures that Perl gives us the trailing empty fields.
289	166				2288	my @cells = split /(?<!\\)\Q$div/, $line, -1;
290
291						# If the line has just one divider as the line-ending, it should not be
292						# treated as marking an empty cell.
293	166	100	66		1224	if ( $cells[-1] eq q{} && $line =~ /\Q$div\E$HorizontalWS*$/ ) {
294	156				1215	pop @cells;
295						}
296
297	166				533	return @cells;
298						}
299
300						sub _cell_params {
301	642			642	789	my $self = shift;
302	642				798	my $cell = shift;
303
304	642				767	my $alignment;
305						my $content;
306
307	642	100	66		2132	if ( defined $cell && $cell =~ /\S/ ) {
308	633				1060	$alignment = $self->_alignment_for_cell($cell);
309
310	633				3641	( $content = $cell ) =~ s/^$HorizontalWS+\|$HorizontalWS+$//g;
311						}
312
313	642				2287	my %p = (
314						colspan => 1,
315						content => $content,
316						);
317
318	642	100			1296	$p{alignment} = $alignment
319						if defined $alignment;
320
321	642				1415	return \%p;
322						}
323
324						sub _alignment_for_cell {
325	633			633	753	my $self = shift;
326	633				763	my $cell = shift;
327
328	633	100			1422	return 'center'
329						if $cell =~ /^\p{SpaceSeparator}{2,}.+?\p{SpaceSeparator}{2,}$/;
330
331	632	100			1632	return 'left'
332						if $cell =~ /\p{SpaceSeparator}{2,}$/;
333
334	151	100			319	return 'right'
335						if $cell =~ /^\p{SpaceSeparator}{2,}/;
336
337	148				229	return;
338						}
339
340						sub _normalize_cell_count_and_alignments {
341	21			21	42	my $self = shift;
342	21				54	my @rows = @_;
343
344						# We use the first header row as an indicator for how many cells we expect
345						# on each line.
346	21				46	my $default_cells = sum( map { $_->{colspan} } @{ $rows[0] } );
	48				174
	21				62
347
348						# Alignments are inherited from the cell above, or they default to
349						# "left". We loop through all the rules and set alignments accordingly.
350	21				53	my %alignments;
351
352	21				54	for my $row ( grep {defined} @rows ) {
	161				250
353
354						# If we have one extra column and the final cell has a colspan > 1 it
355						# means we misinterpreted a trailing divider as indicating that the
356						# prior cell had a colspan > 1. We adjust for that by comparing it to
357						# the number of columns in the first row.
358	159	50	33		198	if ( sum( map { $_->{colspan} } @{$row} ) == $default_cells + 1
	628				1027
	159				235
359						&& $row->[-1]{colspan} > 1 ) {
360	0				0	$row->[-1]{colspan}--;
361						}
362
363	159				237	my $i = 0;
364	159				185	for my $cell ( @{$row} ) {
	159				230
365	628	100			884	if ( $cell->{alignment} ) {
366	480				620	$alignments{$i} = $cell->{alignment};
367						}
368						else {
369	148		100		391	$cell->{alignment} = $alignments{$i} \|\| 'left';
370						}
371
372	628				869	$i += $cell->{colspan};
373						}
374						}
375						}
376
377						sub _events_for_rows {
378	38			38	77	my $self = shift;
379	38				67	my $rows = shift;
380	38				67	my $type = shift;
381
382	38				85	my $start = 'StartTable' . $type;
383	38				88	my $end = 'EndTable' . $type;
384
385	38				140	$self->_send_event($start);
386
387	38				1250	for my $row ( @{$rows} ) {
	38				111
388	161	100			436	if ( !defined $row ) {
389	2				11	$self->_send_event($end);
390	2				62	$self->_send_event($start);
391	2				62	next;
392						}
393
394	159				468	$self->_send_event('StartTableRow');
395
396	159				4197	for my $cell ( @{$row} ) {
	159				367
397	628				1460	my $content = delete $cell->{content};
398
399						$self->_send_event(
400						'StartTableCell',
401	628				920	%{$cell}
	628				3032
402						);
403
404	628	100			16483	if ( defined $content ) {
405
406						# If the content has newlines, it should be matched as a
407						# block-level construct (blockquote, list, etc), but to make
408						# that work, it has to have a trailing newline.
409	626	100			1490	$content .= "\n"
410						if $content =~ /\n/;
411
412	626				2023	$self->_parse_text( \$content );
413						}
414
415						$self->_send_event(
416						'EndTableCell',
417						is_header_cell => $cell->{is_header_cell},
418	628				2440	);
419						}
420
421	159				526	$self->_send_event('EndTableRow');
422						}
423
424	38				153	$self->_send_event($end);
425						}
426
427						# A table cell's contents can be a single line _not_ terminated by a
428						# newline. If that's the case, it won't match as a paragraph.
429						#
430						## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
431						sub _match_table_cell {
432	625			625	941	my $self = shift;
433	625				907	my $text = shift;
434
435	625	100			758	return unless ${$text} =~ / \G
	625				2834
436						(
437						^
438						\p{SpaceSeparator}*
439						\S
440						.*
441						)
442						\z
443						/xmgc;
444
445	620	50			15659	$self->_debug_parse_result(
446						$1,
447						'table cell',
448						) if $self->debug;
449
450	620				14878	$self->_span_parser->parse_block($1);
451						}
452						## use critic
453
454						1;
455
456						# ABSTRACT: Block parser for Theory's proposed Markdown extensions
457
458						__END__
459
460						=pod
461
462						=encoding UTF-8
463
464						=head1 NAME
465
466						Markdent::Dialect::Theory::BlockParser - Block parser for Theory's proposed Markdown extensions
467
468						=head1 VERSION
469
470						version 0.39
471
472						=head1 DESCRIPTION
473
474						This role adds parsing for Markdown extensions proposed by David Wheeler (aka
475						Theory). See
476						L<http://justatheory.com/computers/markup/markdown-table-rfc.html> and
477						L<http://justatheory.com/computers/markup/modest-markdown-proposal.html> for
478						details.
479
480						For now, this role handles tables only.
481
482						This role should be applied to L<Markdent::Parser::BlockParser> class or a
483						subclass of that class.
484
485						=head1 ROLES
486
487						This role does the L<Markdent::Role::Dialect::BlockParser> role.
488
489						=head1 BUGS
490
491						See L<Markdent> for bug reporting details.
492
493						Bugs may be submitted at L<https://github.com/houseabsolute/Markdent/issues>.
494
495						I am also usually active on IRC as 'autarch' on C<irc://irc.perl.org>.
496
497						=head1 SOURCE
498
499						The source code repository for Markdent can be found at L<https://github.com/houseabsolute/Markdent>.
500
501						=head1 AUTHOR
502
503						Dave Rolsky <autarch@urth.org>
504
505						=head1 COPYRIGHT AND LICENSE
506
507						This software is copyright (c) 2021 by Dave Rolsky.
508
509						This is free software; you can redistribute it and/or modify it under
510						the same terms as the Perl 5 programming language system itself.
511
512						The full text of the license can be found in the
513						F<LICENSE> file included with this distribution.
514
515						=cut