File Coverage

blib/lib/TAP/Parser/Grammar.pm

Criterion	Covered	Total	%
statement	95	96	98.9
branch	21	22	95.4
condition	9	11	81.8
subroutine	22	22	100.0
pod	5	5	100.0
total	152	156	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package TAP::Parser::Grammar;
2
3	31			31		755	use strict;
	31					45
	31					744
4	31			31		114	use warnings;
	31					36
	31					671
5
6	31			31		11606	use TAP::Parser::ResultFactory ();
	31					82
	31					496
7	31			31		11033	use TAP::Parser::YAMLish::Reader ();
	31					58
	31					692
8
9	31			31		169	use base 'TAP::Object';
	31					32
	31					56628
10
11							=head1 NAME
12
13							TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
14
15							=head1 VERSION
16
17							Version 3.38
18
19							=cut
20
21							our $VERSION = '3.38';
22
23							=head1 SYNOPSIS
24
25							use TAP::Parser::Grammar;
26							my $grammar = $self->make_grammar({
27							iterator => $tap_parser_iterator,
28							parser => $tap_parser,
29							version => 12,
30							});
31
32							my $result = $grammar->tokenize;
33
34							=head1 DESCRIPTION
35
36							C tokenizes lines from a L and
37							constructs L subclasses to represent the tokens.
38
39							Do not attempt to use this class directly. It won't make sense. It's mainly
40							here to ensure that we will be able to have pluggable grammars when TAP is
41							expanded at some future date (plus, this stuff was really cluttering the
42							parser).
43
44							=head1 METHODS
45
46							=head2 Class Methods
47
48							=head3 C
49
50							my $grammar = TAP::Parser::Grammar->new({
51							iterator => $iterator,
52							parser => $parser,
53							version => $version,
54							});
55
56							Returns L grammar object that will parse the TAP stream from the
57							specified iterator. Both C and C are required arguments.
58							If C is not set it defaults to C<12> (see L for more
59							details).
60
61							=cut
62
63							# new() implementation supplied by TAP::Object
64							sub _initialize {
65	283			283		474	my ( $self, $args ) = @_;
66	283					700	$self->{iterator} = $args->{iterator}; # TODO: accessor
67	283		66			806	$self->{iterator} \|\|= $args->{stream}; # deprecated
68	283					486	$self->{parser} = $args->{parser}; # TODO: accessor
69	283		100			3376	$self->set_version( $args->{version} \|\| 12 );
70	283					1511	return $self;
71							}
72
73							my %language_for;
74
75							{
76
77							# XXX the 'not' and 'ok' might be on separate lines in VMS ...
78							my $ok = qr/(?:not )?ok\b/;
79							my $num = qr/\d+/;
80
81							my %v12 = (
82							version => {
83							syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84							handler => sub {
85							my ( $self, $line ) = @_;
86							my $version = $1;
87							return $self->_make_version_token( $line, $version, );
88							},
89							},
90							plan => {
91							syntax => qr/^1\.\.(\d+)\s(.)\z/,
92							handler => sub {
93							my ( $self, $line ) = @_;
94							my ( $tests_planned, $tail ) = ( $1, $2 );
95							my $explanation = undef;
96							my $skip = '';
97
98							if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
99							my @todo = split /\s+/, _trim($1);
100							return $self->_make_plan_token(
101							$line, $tests_planned, 'TODO',
102							'', \@todo
103							);
104							}
105							elsif ( 0 == $tests_planned ) {
106							$skip = 'SKIP';
107
108							# If we can't match # SKIP the directive should be undef.
109							($explanation) = $tail =~ /^#\sSKIP\S\s+(.*)/i;
110							}
111							elsif ( $tail !~ /^\s*$/ ) {
112							return $self->_make_unknown_token($line);
113							}
114
115							$explanation = '' unless defined $explanation;
116
117							return $self->_make_plan_token(
118							$line, $tests_planned, $skip,
119							$explanation, []
120							);
121
122							},
123							},
124
125							# An optimization to handle the most common test lines without
126							# directives.
127							simple_test => {
128							syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129							handler => sub {
130							my ( $self, $line ) = @_;
131							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132
133							return $self->_make_test_token(
134							$line, $ok, $num,
135							$desc
136							);
137							},
138							},
139							test => {
140							syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141							handler => sub {
142							my ( $self, $line ) = @_;
143							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
144							my ( $dir, $explanation ) = ( '', '' );
145							if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
146							\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
147							)
148							{
149							( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150							}
151							return $self->_make_test_token(
152							$line, $ok, $num, $desc,
153							$dir, $explanation
154							);
155							},
156							},
157							comment => {
158							syntax => qr/^#(.*)/,
159							handler => sub {
160							my ( $self, $line ) = @_;
161							my $comment = $1;
162							return $self->_make_comment_token( $line, $comment );
163							},
164							},
165							bailout => {
166							syntax => qr/^\sBail out!\s(.*)/,
167							handler => sub {
168							my ( $self, $line ) = @_;
169							my $explanation = $1;
170							return $self->_make_bailout_token(
171							$line,
172							$explanation
173							);
174							},
175							},
176							);
177
178							my %v13 = (
179							%v12,
180							plan => {
181							syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
182							handler => sub {
183							my ( $self, $line ) = @_;
184							my ( $tests_planned, $explanation ) = ( $1, $2 );
185							my $skip
186							= ( 0 == $tests_planned \|\| defined $explanation )
187							? 'SKIP'
188							: '';
189							$explanation = '' unless defined $explanation;
190							return $self->_make_plan_token(
191							$line, $tests_planned, $skip,
192							$explanation, []
193							);
194							},
195							},
196							yaml => {
197							syntax => qr/^ (\s+) (---.*) $/x,
198							handler => sub {
199							my ( $self, $line ) = @_;
200							my ( $pad, $marker ) = ( $1, $2 );
201							return $self->_make_yaml_token( $pad, $marker );
202							},
203							},
204							pragma => {
205							syntax =>
206							qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207							handler => sub {
208							my ( $self, $line ) = @_;
209							my $pragmas = $1;
210							return $self->_make_pragma_token( $line, $pragmas );
211							},
212							},
213							);
214
215							%language_for = (
216							'12' => {
217							tokens => \%v12,
218							},
219							'13' => {
220							tokens => \%v13,
221							setup => sub {
222							shift->{iterator}->handle_unicode;
223							},
224							},
225							);
226							}
227
228							##############################################################################
229
230							=head2 Instance Methods
231
232							=head3 C
233
234							$grammar->set_version(13);
235
236							Tell the grammar which TAP syntax version to support. The lowest
237							supported version is 12. Although 'TAP version' isn't valid version 12
238							syntax it is accepted so that higher version numbers may be parsed.
239
240							=cut
241
242							sub set_version {
243	310			310	1	496	my $self = shift;
244	310					459	my $version = shift;
245
246	310	100				1332	if ( my $language = $language_for{$version} ) {
247	309					747	$self->{version} = $version;
248	309					767	$self->{tokens} = $language->{tokens};
249
250	309	100				798	if ( my $setup = $language->{setup} ) {
251	23					101	$self->$setup();
252							}
253
254	309					948	$self->_order_tokens;
255							}
256							else {
257	1					7	require Carp;
258	1					156	Carp::croak("Unsupported syntax version: $version");
259							}
260							}
261
262							# Optimization to put the most frequent tokens first.
263							sub _order_tokens {
264	309			309		377	my $self = shift;
265
266	309					488	my %copy = %{ $self->{tokens} };
	309					3071
267	1236					3325	my @ordered_tokens = grep {defined}
268	309					932	map { delete $copy{$_} } qw( simple_test test comment plan );
	1236					1922
269	309					897	push @ordered_tokens, values %copy;
270
271	309					1368	$self->{ordered_tokens} = \@ordered_tokens;
272							}
273
274							##############################################################################
275
276							=head3 C
277
278							my $token = $grammar->tokenize;
279
280							This method will return a L object representing the
281							current line of TAP.
282
283							=cut
284
285							sub tokenize {
286	1593			1593	1	2081	my $self = shift;
287
288	1593					6021	my $line = $self->{iterator}->next;
289	1591	100				3180	unless ( defined $line ) {
290	267					505	delete $self->{parser}; # break circular ref
291	267					801	return;
292							}
293
294	1324					1274	my $token;
295
296	1324					1180	for my $token_data ( @{ $self->{ordered_tokens} } ) {
	1324					3065
297	2910	100				21953	if ( $line =~ $token_data->{syntax} ) {
298	1292					1753	my $handler = $token_data->{handler};
299	1292					3173	$token = $self->$handler($line);
300	1292					3484	last;
301							}
302							}
303
304	1324	100				3213	$token = $self->_make_unknown_token($line) unless $token;
305
306	1324					4638	return $self->{parser}->make_result($token);
307							}
308
309							##############################################################################
310
311							=head3 C
312
313							my @types = $grammar->token_types;
314
315							Returns the different types of tokens which this grammar can parse.
316
317							=cut
318
319							sub token_types {
320	2			2	1	638	my $self = shift;
321	2					3	return keys %{ $self->{tokens} };
	2					18
322							}
323
324							##############################################################################
325
326							=head3 C
327
328							my $syntax = $grammar->syntax_for($token_type);
329
330							Returns a pre-compiled regular expression which will match a chunk of TAP
331							corresponding to the token type. For example (not that you should really pay
332							attention to this, C<< $grammar->syntax_for('comment') >> will return
333							C<< qr/^#(.*)/ >>.
334
335							=cut
336
337							sub syntax_for {
338	8			8	1	2432	my ( $self, $type ) = @_;
339	8					31	return $self->{tokens}->{$type}->{syntax};
340							}
341
342							##############################################################################
343
344							=head3 C
345
346							my $handler = $grammar->handler_for($token_type);
347
348							Returns a code reference which, when passed an appropriate line of TAP,
349							returns the lexed token corresponding to that line. As a result, the basic
350							TAP parsing loop looks similar to the following:
351
352							my @tokens;
353							my $grammar = TAP::Grammar->new;
354							LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
355							for my $type ( $grammar->token_types ) {
356							my $syntax = $grammar->syntax_for($type);
357							if ( $line =~ $syntax ) {
358							my $handler = $grammar->handler_for($type);
359							push @tokens => $grammar->$handler($line);
360							next LINE;
361							}
362							}
363							push @tokens => $grammar->_make_unknown_token($line);
364							}
365
366							=cut
367
368							sub handler_for {
369	8			8	1	13	my ( $self, $type ) = @_;
370	8					28	return $self->{tokens}->{$type}->{handler};
371							}
372
373							sub _make_version_token {
374	28			28		60	my ( $self, $line, $version ) = @_;
375							return {
376	28					195	type => 'version',
377							raw => $line,
378							version => $version,
379							};
380							}
381
382							sub _make_plan_token {
383	266			266		645	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384
385	266	100	100			1122	if ( $directive eq 'SKIP'
			66
386							&& 0 != $tests_planned
387							&& $self->{version} < 13 )
388							{
389	1					10	warn
390							"Specified SKIP directive in plan but more than 0 tests ($line)\n";
391							}
392
393							return {
394	266					1076	type => 'plan',
395							raw => $line,
396							tests_planned => $tests_planned,
397							directive => $directive,
398							explanation => _trim($explanation),
399							todo_list => $todo,
400							};
401							}
402
403							sub _make_test_token {
404	865			865		1801	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
405							return {
406	865	100				3601	ok => $ok,
		100
407
408							# forcing this to be an integer (and not a string) reduces memory
409							# consumption. RT #84939
410							test_num => ( defined $num ? 0 + $num : undef ),
411							description => _trim($desc),
412							directive => ( defined $dir ? uc $dir : '' ),
413							explanation => _trim($explanation),
414							raw => $line,
415							type => 'test',
416							};
417							}
418
419							sub _make_unknown_token {
420	32			32		79	my ( $self, $line ) = @_;
421							return {
422	32					154	raw => $line,
423							type => 'unknown',
424							};
425							}
426
427							sub _make_comment_token {
428	111			111		165	my ( $self, $line, $comment ) = @_;
429							return {
430	111					252	type => 'comment',
431							raw => $line,
432							comment => _trim($comment)
433							};
434							}
435
436							sub _make_bailout_token {
437	9			9		23	my ( $self, $line, $explanation ) = @_;
438							return {
439	9					28	type => 'bailout',
440							raw => $line,
441							bailout => _trim($explanation)
442							};
443							}
444
445							sub _make_yaml_token {
446	10			10		30	my ( $self, $pad, $marker ) = @_;
447
448	10					104	my $yaml = TAP::Parser::YAMLish::Reader->new;
449
450	10					21	my $iterator = $self->{iterator};
451
452							# Construct a reader that reads from our input stripping leading
453							# spaces from each line.
454	10					76	my $leader = length($pad);
455	10					120	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
456	10					67	my @extra = ($marker);
457							my $reader = sub {
458	50	100		50		116	return shift @extra if @extra;
459	40					99	my $line = $iterator->next;
460	40	50				478	return $2 if $line =~ $strip;
461	0					0	return;
462	10					60	};
463
464	10					69	my $data = $yaml->read($reader);
465
466							# Reconstitute input. This is convoluted. Maybe we should just
467							# record it on the way in...
468	10					33	chomp( my $raw = $yaml->get_raw );
469	10					87	$raw =~ s/^/$pad/mg;
470
471							return {
472	10					129	type => 'yaml',
473							raw => $raw,
474							data => $data
475							};
476							}
477
478							sub _make_pragma_token {
479	7			7		18	my ( $self, $line, $pragmas ) = @_;
480							return {
481	7					26	type => 'pragma',
482							raw => $line,
483							pragmas => [ split /\s,\s/, _trim($pragmas) ],
484							};
485							}
486
487							sub _trim {
488	2125			2125		2361	my $data = shift;
489
490	2125	100				9222	return '' unless defined $data;
491
492	932					1990	$data =~ s/^\s+//;
493	932					4152	$data =~ s/\s+$//;
494	932					6026	return $data;
495							}
496
497							1;
498
499							=head1 TAP GRAMMAR
500
501							B This grammar is slightly out of date. There's still some discussion
502							about it and a new one will be provided when we have things better defined.
503
504							The L does not use a formal grammar because TAP is essentially a
505							stream-based protocol. In fact, it's quite legal to have an infinite stream.
506							For the same reason that we don't apply regexes to streams, we're not using a
507							formal grammar here. Instead, we parse the TAP in lines.
508
509							For purposes for forward compatibility, any result which does not match the
510							following grammar is currently referred to as
511							L. It is I a parse error.
512
513							A formal grammar would look similar to the following:
514
515							(*
516							For the time being, I'm cheating on the EBNF by allowing
517							certain terms to be defined by POSIX character classes by
518							using the following syntax:
519
520							digit ::= [:digit:]
521
522							As far as I am aware, that's not valid EBNF. Sue me. I
523							didn't know how to write "char" otherwise (Unicode issues).
524							Suggestions welcome.
525							*)
526
527							tap ::= version? { comment \| unknown } leading_plan lines
528							\|
529							lines trailing_plan {comment}
530
531							version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
532
533							leading_plan ::= plan skip_directive? "\n"
534
535							trailing_plan ::= plan "\n"
536
537							plan ::= '1..' nonNegativeInteger
538
539							lines ::= line {line}
540
541							line ::= (comment \| test \| unknown \| bailout ) "\n"
542
543							test ::= status positiveInteger? description? directive?
544
545							status ::= 'not '? 'ok '
546
547							description ::= (character - (digit \| '#')) {character - '#'}
548
549							directive ::= todo_directive \| skip_directive
550
551							todo_directive ::= hash_mark 'TODO' ' ' {character}
552
553							skip_directive ::= hash_mark 'SKIP' ' ' {character}
554
555							comment ::= hash_mark {character}
556
557							hash_mark ::= '#' {' '}
558
559							bailout ::= 'Bail out!' {character}
560
561							unknown ::= { (character - "\n") }
562
563							(* POSIX character classes and other terminals *)
564
565							digit ::= [:digit:]
566							character ::= ([:print:] - "\n")
567							positiveInteger ::= ( digit - '0' ) {digit}
568							nonNegativeInteger ::= digit {digit}
569
570							=head1 SUBCLASSING
571
572							Please see L for a subclassing overview.
573
574							If you I want to subclass L's grammar the best thing to
575							do is read through the code. There's no easy way of summarizing it here.
576
577							=head1 SEE ALSO
578
579							L,
580							L,
581							L,
582							L,
583
584							=cut