File Coverage

blib/lib/TAP/Parser/Grammar.pm

Criterion	Covered	Total	%
statement	95	96	98.9
branch	21	22	95.4
condition	9	11	81.8
subroutine	22	22	100.0
pod	5	5	100.0
total	152	156	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package TAP::Parser::Grammar;
2
3	31			31		739	use strict;
	31					46
	31					739
4	31			31		109	use warnings;
	31					38
	31					621
5
6	31			31		11033	use TAP::Parser::ResultFactory ();
	31					62
	31					560
7	31			31		11513	use TAP::Parser::YAMLish::Reader ();
	31					55
	31					728
8
9	31			31		157	use base 'TAP::Object';
	31					34
	31					57106
10
11							=head1 NAME
12
13							TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
14
15							=head1 VERSION
16
17							Version 3.39
18
19							=cut
20
21							our $VERSION = '3.39';
22
23							=head1 SYNOPSIS
24
25							use TAP::Parser::Grammar;
26							my $grammar = $self->make_grammar({
27							iterator => $tap_parser_iterator,
28							parser => $tap_parser,
29							version => 12,
30							});
31
32							my $result = $grammar->tokenize;
33
34							=head1 DESCRIPTION
35
36							C tokenizes lines from a L and
37							constructs L subclasses to represent the tokens.
38
39							Do not attempt to use this class directly. It won't make sense. It's mainly
40							here to ensure that we will be able to have pluggable grammars when TAP is
41							expanded at some future date (plus, this stuff was really cluttering the
42							parser).
43
44							=head1 METHODS
45
46							=head2 Class Methods
47
48							=head3 C
49
50							my $grammar = TAP::Parser::Grammar->new({
51							iterator => $iterator,
52							parser => $parser,
53							version => $version,
54							});
55
56							Returns L grammar object that will parse the TAP stream from the
57							specified iterator. Both C and C are required arguments.
58							If C is not set it defaults to C<12> (see L for more
59							details).
60
61							=cut
62
63							# new() implementation supplied by TAP::Object
64							sub _initialize {
65	283			283		508	my ( $self, $args ) = @_;
66	283					776	$self->{iterator} = $args->{iterator}; # TODO: accessor
67	283		66			753	$self->{iterator} \|\|= $args->{stream}; # deprecated
68	283					528	$self->{parser} = $args->{parser}; # TODO: accessor
69	283		100			3515	$self->set_version( $args->{version} \|\| 12 );
70	283					1561	return $self;
71							}
72
73							my %language_for;
74
75							{
76
77							# XXX the 'not' and 'ok' might be on separate lines in VMS ...
78							my $ok = qr/(?:not )?ok\b/;
79							my $num = qr/\d+/;
80
81							my %v12 = (
82							version => {
83							syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84							handler => sub {
85							my ( $self, $line ) = @_;
86							my $version = $1;
87							return $self->_make_version_token( $line, $version, );
88							},
89							},
90							plan => {
91							syntax => qr/^1\.\.(\d+)\s(.)\z/,
92							handler => sub {
93							my ( $self, $line ) = @_;
94							my ( $tests_planned, $tail ) = ( $1, $2 );
95							my $explanation = undef;
96							my $skip = '';
97
98							if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
99							my @todo = split /\s+/, _trim($1);
100							return $self->_make_plan_token(
101							$line, $tests_planned, 'TODO',
102							'', \@todo
103							);
104							}
105							elsif ( 0 == $tests_planned ) {
106							$skip = 'SKIP';
107
108							# If we can't match # SKIP the directive should be undef.
109							($explanation) = $tail =~ /^#\sSKIP\S\s+(.*)/i;
110							}
111							elsif ( $tail !~ /^\s*$/ ) {
112							return $self->_make_unknown_token($line);
113							}
114
115							$explanation = '' unless defined $explanation;
116
117							return $self->_make_plan_token(
118							$line, $tests_planned, $skip,
119							$explanation, []
120							);
121
122							},
123							},
124
125							# An optimization to handle the most common test lines without
126							# directives.
127							simple_test => {
128							syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129							handler => sub {
130							my ( $self, $line ) = @_;
131							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132
133							return $self->_make_test_token(
134							$line, $ok, $num,
135							$desc
136							);
137							},
138							},
139							test => {
140							syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141							handler => sub {
142							my ( $self, $line ) = @_;
143							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
144							my ( $dir, $explanation ) = ( '', '' );
145							if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
146							\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
147							)
148							{
149							( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150							}
151							return $self->_make_test_token(
152							$line, $ok, $num, $desc,
153							$dir, $explanation
154							);
155							},
156							},
157							comment => {
158							syntax => qr/^#(.*)/,
159							handler => sub {
160							my ( $self, $line ) = @_;
161							my $comment = $1;
162							return $self->_make_comment_token( $line, $comment );
163							},
164							},
165							bailout => {
166							syntax => qr/^\sBail out!\s(.*)/,
167							handler => sub {
168							my ( $self, $line ) = @_;
169							my $explanation = $1;
170							return $self->_make_bailout_token(
171							$line,
172							$explanation
173							);
174							},
175							},
176							);
177
178							my %v13 = (
179							%v12,
180							plan => {
181							syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
182							handler => sub {
183							my ( $self, $line ) = @_;
184							my ( $tests_planned, $explanation ) = ( $1, $2 );
185							my $skip
186							= ( 0 == $tests_planned \|\| defined $explanation )
187							? 'SKIP'
188							: '';
189							$explanation = '' unless defined $explanation;
190							return $self->_make_plan_token(
191							$line, $tests_planned, $skip,
192							$explanation, []
193							);
194							},
195							},
196							yaml => {
197							syntax => qr/^ (\s+) (---.*) $/x,
198							handler => sub {
199							my ( $self, $line ) = @_;
200							my ( $pad, $marker ) = ( $1, $2 );
201							return $self->_make_yaml_token( $pad, $marker );
202							},
203							},
204							pragma => {
205							syntax =>
206							qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207							handler => sub {
208							my ( $self, $line ) = @_;
209							my $pragmas = $1;
210							return $self->_make_pragma_token( $line, $pragmas );
211							},
212							},
213							);
214
215							%language_for = (
216							'12' => {
217							tokens => \%v12,
218							},
219							'13' => {
220							tokens => \%v13,
221							setup => sub {
222							shift->{iterator}->handle_unicode;
223							},
224							},
225							);
226							}
227
228							##############################################################################
229
230							=head2 Instance Methods
231
232							=head3 C
233
234							$grammar->set_version(13);
235
236							Tell the grammar which TAP syntax version to support. The lowest
237							supported version is 12. Although 'TAP version' isn't valid version 12
238							syntax it is accepted so that higher version numbers may be parsed.
239
240							=cut
241
242							sub set_version {
243	310			310	1	380	my $self = shift;
244	310					433	my $version = shift;
245
246	310	100				1255	if ( my $language = $language_for{$version} ) {
247	309					717	$self->{version} = $version;
248	309					786	$self->{tokens} = $language->{tokens};
249
250	309	100				852	if ( my $setup = $language->{setup} ) {
251	23					75	$self->$setup();
252							}
253
254	309					1012	$self->_order_tokens;
255							}
256							else {
257	1					9	require Carp;
258	1					158	Carp::croak("Unsupported syntax version: $version");
259							}
260							}
261
262							# Optimization to put the most frequent tokens first.
263							sub _order_tokens {
264	309			309		414	my $self = shift;
265
266	309					331	my %copy = %{ $self->{tokens} };
	309					2930
267	1236					2216	my @ordered_tokens = grep {defined}
268	309					973	map { delete $copy{$_} } qw( simple_test test comment plan );
	1236					1948
269	309					1212	push @ordered_tokens, values %copy;
270
271	309					1344	$self->{ordered_tokens} = \@ordered_tokens;
272							}
273
274							##############################################################################
275
276							=head3 C
277
278							my $token = $grammar->tokenize;
279
280							This method will return a L object representing the
281							current line of TAP.
282
283							=cut
284
285							sub tokenize {
286	1593			1593	1	1510	my $self = shift;
287
288	1593					5509	my $line = $self->{iterator}->next;
289	1591	100				3035	unless ( defined $line ) {
290	267					572	delete $self->{parser}; # break circular ref
291	267					650	return;
292							}
293
294	1324					1185	my $token;
295
296	1324					1341	for my $token_data ( @{ $self->{ordered_tokens} } ) {
	1324					3247
297	2915	100				19129	if ( $line =~ $token_data->{syntax} ) {
298	1292					1762	my $handler = $token_data->{handler};
299	1292					3429	$token = $self->$handler($line);
300	1292					3716	last;
301							}
302							}
303
304	1324	100				2560	$token = $self->_make_unknown_token($line) unless $token;
305
306	1324					4313	return $self->{parser}->make_result($token);
307							}
308
309							##############################################################################
310
311							=head3 C
312
313							my @types = $grammar->token_types;
314
315							Returns the different types of tokens which this grammar can parse.
316
317							=cut
318
319							sub token_types {
320	2			2	1	441	my $self = shift;
321	2					2	return keys %{ $self->{tokens} };
	2					17
322							}
323
324							##############################################################################
325
326							=head3 C
327
328							my $syntax = $grammar->syntax_for($token_type);
329
330							Returns a pre-compiled regular expression which will match a chunk of TAP
331							corresponding to the token type. For example (not that you should really pay
332							attention to this, C<< $grammar->syntax_for('comment') >> will return
333							C<< qr/^#(.*)/ >>.
334
335							=cut
336
337							sub syntax_for {
338	8			8	1	1575	my ( $self, $type ) = @_;
339	8					31	return $self->{tokens}->{$type}->{syntax};
340							}
341
342							##############################################################################
343
344							=head3 C
345
346							my $handler = $grammar->handler_for($token_type);
347
348							Returns a code reference which, when passed an appropriate line of TAP,
349							returns the lexed token corresponding to that line. As a result, the basic
350							TAP parsing loop looks similar to the following:
351
352							my @tokens;
353							my $grammar = TAP::Grammar->new;
354							LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
355							for my $type ( $grammar->token_types ) {
356							my $syntax = $grammar->syntax_for($type);
357							if ( $line =~ $syntax ) {
358							my $handler = $grammar->handler_for($type);
359							push @tokens => $grammar->$handler($line);
360							next LINE;
361							}
362							}
363							push @tokens => $grammar->_make_unknown_token($line);
364							}
365
366							=cut
367
368							sub handler_for {
369	8			8	1	9	my ( $self, $type ) = @_;
370	8					26	return $self->{tokens}->{$type}->{handler};
371							}
372
373							sub _make_version_token {
374	28			28		62	my ( $self, $line, $version ) = @_;
375							return {
376	28					175	type => 'version',
377							raw => $line,
378							version => $version,
379							};
380							}
381
382							sub _make_plan_token {
383	266			266		650	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384
385	266	100	100			1182	if ( $directive eq 'SKIP'
			66
386							&& 0 != $tests_planned
387							&& $self->{version} < 13 )
388							{
389	1					9	warn
390							"Specified SKIP directive in plan but more than 0 tests ($line)\n";
391							}
392
393							return {
394	266					1000	type => 'plan',
395							raw => $line,
396							tests_planned => $tests_planned,
397							directive => $directive,
398							explanation => _trim($explanation),
399							todo_list => $todo,
400							};
401							}
402
403							sub _make_test_token {
404	865			865		1605	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
405							return {
406	865	100				3545	ok => $ok,
		100
407
408							# forcing this to be an integer (and not a string) reduces memory
409							# consumption. RT #84939
410							test_num => ( defined $num ? 0 + $num : undef ),
411							description => _trim($desc),
412							directive => ( defined $dir ? uc $dir : '' ),
413							explanation => _trim($explanation),
414							raw => $line,
415							type => 'test',
416							};
417							}
418
419							sub _make_unknown_token {
420	32			32		73	my ( $self, $line ) = @_;
421							return {
422	32					114	raw => $line,
423							type => 'unknown',
424							};
425							}
426
427							sub _make_comment_token {
428	111			111		172	my ( $self, $line, $comment ) = @_;
429							return {
430	111					308	type => 'comment',
431							raw => $line,
432							comment => _trim($comment)
433							};
434							}
435
436							sub _make_bailout_token {
437	9			9		22	my ( $self, $line, $explanation ) = @_;
438							return {
439	9					29	type => 'bailout',
440							raw => $line,
441							bailout => _trim($explanation)
442							};
443							}
444
445							sub _make_yaml_token {
446	10			10		23	my ( $self, $pad, $marker ) = @_;
447
448	10					86	my $yaml = TAP::Parser::YAMLish::Reader->new;
449
450	10					24	my $iterator = $self->{iterator};
451
452							# Construct a reader that reads from our input stripping leading
453							# spaces from each line.
454	10					23	my $leader = length($pad);
455	10					190	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
456	10					27	my @extra = ($marker);
457							my $reader = sub {
458	50	100		50		116	return shift @extra if @extra;
459	40					108	my $line = $iterator->next;
460	40	50				377	return $2 if $line =~ $strip;
461	0					0	return;
462	10					51	};
463
464	10					59	my $data = $yaml->read($reader);
465
466							# Reconstitute input. This is convoluted. Maybe we should just
467							# record it on the way in...
468	10					33	chomp( my $raw = $yaml->get_raw );
469	10					114	$raw =~ s/^/$pad/mg;
470
471							return {
472	10					118	type => 'yaml',
473							raw => $raw,
474							data => $data
475							};
476							}
477
478							sub _make_pragma_token {
479	7			7		13	my ( $self, $line, $pragmas ) = @_;
480							return {
481	7					22	type => 'pragma',
482							raw => $line,
483							pragmas => [ split /\s,\s/, _trim($pragmas) ],
484							};
485							}
486
487							sub _trim {
488	2125			2125		2270	my $data = shift;
489
490	2125	100				8910	return '' unless defined $data;
491
492	932					2101	$data =~ s/^\s+//;
493	932					1857	$data =~ s/\s+$//;
494	932					5563	return $data;
495							}
496
497							1;
498
499							=head1 TAP GRAMMAR
500
501							B This grammar is slightly out of date. There's still some discussion
502							about it and a new one will be provided when we have things better defined.
503
504							The L does not use a formal grammar because TAP is essentially a
505							stream-based protocol. In fact, it's quite legal to have an infinite stream.
506							For the same reason that we don't apply regexes to streams, we're not using a
507							formal grammar here. Instead, we parse the TAP in lines.
508
509							For purposes for forward compatibility, any result which does not match the
510							following grammar is currently referred to as
511							L. It is I a parse error.
512
513							A formal grammar would look similar to the following:
514
515							(*
516							For the time being, I'm cheating on the EBNF by allowing
517							certain terms to be defined by POSIX character classes by
518							using the following syntax:
519
520							digit ::= [:digit:]
521
522							As far as I am aware, that's not valid EBNF. Sue me. I
523							didn't know how to write "char" otherwise (Unicode issues).
524							Suggestions welcome.
525							*)
526
527							tap ::= version? { comment \| unknown } leading_plan lines
528							\|
529							lines trailing_plan {comment}
530
531							version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
532
533							leading_plan ::= plan skip_directive? "\n"
534
535							trailing_plan ::= plan "\n"
536
537							plan ::= '1..' nonNegativeInteger
538
539							lines ::= line {line}
540
541							line ::= (comment \| test \| unknown \| bailout ) "\n"
542
543							test ::= status positiveInteger? description? directive?
544
545							status ::= 'not '? 'ok '
546
547							description ::= (character - (digit \| '#')) {character - '#'}
548
549							directive ::= todo_directive \| skip_directive
550
551							todo_directive ::= hash_mark 'TODO' ' ' {character}
552
553							skip_directive ::= hash_mark 'SKIP' ' ' {character}
554
555							comment ::= hash_mark {character}
556
557							hash_mark ::= '#' {' '}
558
559							bailout ::= 'Bail out!' {character}
560
561							unknown ::= { (character - "\n") }
562
563							(* POSIX character classes and other terminals *)
564
565							digit ::= [:digit:]
566							character ::= ([:print:] - "\n")
567							positiveInteger ::= ( digit - '0' ) {digit}
568							nonNegativeInteger ::= digit {digit}
569
570							=head1 SUBCLASSING
571
572							Please see L for a subclassing overview.
573
574							If you I want to subclass L's grammar the best thing to
575							do is read through the code. There's no easy way of summarizing it here.
576
577							=head1 SEE ALSO
578
579							L,
580							L,
581							L,
582							L,
583
584							=cut