File Coverage

blib/lib/TAPx/Parser/Grammar.pm

Criterion	Covered	Total	%
statement	45	46	97.8
branch	8	10	80.0
condition	9	13	69.2
subroutine	14	14	100.0
pod	5	5	100.0
total	81	88	92.0

line	stmt	bran	cond	sub	pod	time	code
1							package TAPx::Parser::Grammar;
2
3	12			12		10128	use strict;
	12					31
	12					803
4	12			12		67	use vars qw($VERSION);
	12					26
	12					521
5
6	12			12		5513	use TAPx::Parser::Result;
	12					32
	12					14934
7
8							=head1 NAME
9
10							TAPx::Parser::Grammar - A grammar for the original TAP version.
11
12							=head1 VERSION
13
14							Version 0.50_07
15
16							=cut
17
18							$VERSION = '0.50_07';
19
20							=head1 DESCRIPTION
21
22							C is actually just a means for identifying individual
23							chunks (usually lines) of TAP.
24
25							Do not attempt to use this class directly. It won't make sense. It's mainly
26							here to ensure that we will be able to have pluggable grammars when TAP is
27							expanded at some future date (plus, this stuff was really cluttering the
28							parser).
29
30							Note that currently all methods are class methods. It's intended that this
31							will eventually support C and beyond which will necessitate actual
32							instance data, but for now, we don't need this. Hence, the curious decision
33							to use a class where one doesn't apparently need one.
34
35							=cut
36
37							##############################################################################
38
39							=head2 Class Methods
40
41
42							=head3 C
43
44							my $grammar = TAPx::Grammar->new;
45
46							Returns TAP grammar object. Future versions may accept a version number.
47
48							=cut
49
50							sub new {
51	67			67	1	1377	my ($class) = @_;
52	67					607	bless {}, $class;
53							}
54
55							# XXX the 'not' and 'ok' might be on separate lines in VMS ...
56							my $ok = qr/(?:not )?ok\b/;
57							my $num = qr/\d+/;
58
59							# description is any which is not followed by an odd number of escapes
60							# following by '#': \\\# \#
61							my $description = qr/.?(?!\$?:\\\$)#?/;
62
63							# if we have an even number of escapes in front of the '#', assert that it
64							# does not have an escape in front of it (this gets around the 'no variable
65							# length lookbehind assertions')
66							my $directive = qr/
67							(?
68							(?i:
69							\#\s+
70							(TODO\|SKIP)\b
71							(.*)
72							)?
73							/x;
74
75							my %token_for = (
76							plan => {
77							syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
78							handler => sub {
79							my ( $self, $line ) = @_;
80							local *__ANON__ = '__ANON__plan_token_handler';
81							my $tests_planned = $1;
82							my $explanation = $2;
83							my $skip =
84							( 0 == $tests_planned \|\| defined $explanation )
85							? 'SKIP'
86							: '';
87							$explanation = '' unless defined $explanation;
88							return $self->_make_plan_token(
89							$line,
90							$tests_planned,
91							$skip,
92							_trim($explanation),
93							);
94							},
95							},
96							test => {
97							syntax => qr/^
98							($ok)
99							\s*
100							($num)?
101							\s*
102							($description)?
103							$directive # $4 = directive, $5 = explanation
104							\z/x,
105							handler => sub {
106							my ( $self, $line ) = @_;
107							local *__ANON__ = '__ANON__test_token_handler';
108							my ( $ok, $num, $desc, $dir, $explanation )
109							= ( $1, $2, $3, $4, $5 );
110							return $self->_make_test_token(
111							$line,
112							$ok,
113							$num,
114							$desc,
115							uc $dir,
116							$explanation
117							);
118							},
119							},
120							comment => {
121							syntax => qr/^#(.*)/,
122							handler => sub {
123							my ( $self, $line ) = @_;
124							local *__ANON__ = '__ANON__comment_token_handler';
125							my $comment = $1;
126							return $self->_make_comment_token( $line, $comment );
127							},
128							},
129							bailout => {
130							syntax => qr/^Bail out!\s(.)/,
131							handler => sub {
132							my ( $self, $line ) = @_;
133							local *__ANON__ = '__ANON__bailout_token_handler';
134							my $explanation = $1;
135							return $self->_make_bailout_token( $line, _trim($explanation) );
136							},
137							},
138							);
139
140							##############################################################################
141
142							=head3 C
143
144							my $token = $grammar->tokenize($string);
145
146							Passed a line of TAP, this method will return a data structure representing a
147							'token' matching that line of TAP input. Designed to be passed to
148							C to create a result object.
149
150							This is really the only method you need to worry about for the grammar. The
151							methods below are merely for convenience, if needed.
152
153							=cut
154
155							sub tokenize {
156	365			365	1	698	my $self = shift;
157	365	100	66			2566	return unless @_ && defined $_[0];
158
159	361					652	my $line = shift;
160	361					555	my $token;
161
162	361					1567	foreach my $token_data ( values %token_for ) {
163	697	100				10028	if ( $line =~ $token_data->{syntax} ) {
164	348					741	my $handler = $token_data->{handler};
165	348					1509	$token = $self->$handler($line);
166	348					1248	last;
167							}
168							}
169	361		66			1275	$token \|\|= $self->_make_unknown_token($line);
170	361	50				2783	return defined $token ? TAPx::Parser::Result->new($token) : ();
171							}
172
173							##############################################################################
174
175							=head2 Class methods
176
177							=head3 C
178
179							my @types = $grammar->token_types;
180
181							Returns the different types of tokens which this grammar can parse.
182
183							=cut
184
185	1			1	1	13	sub token_types { keys %token_for }
186
187							##############################################################################
188
189							=head3 C
190
191							my $syntax = $grammar->syntax_for($token_type);
192
193							Returns a pre-compiled regular expression which will match a chunk of TAP
194							corresponding to the token type. For example (not that you should really pay
195							attention to this, C<< $grammar->syntax_for('comment') >> will return
196							C<< qr/^#(.*)/ >>.
197
198							=cut
199
200							sub syntax_for {
201	4			4	1	1637	my ( $proto, $type ) = @_;
202	4					27	return $token_for{$type}{syntax};
203							}
204
205							##############################################################################
206
207							=head3 C
208
209							my $handler = $grammar->handler_for($token_type);
210
211							Returns a code reference which, when passed an appropriate line of TAP,
212							returns the lexed token corresponding to that line. As a result, the basic
213							TAP parsing loop looks similar to the following:
214
215							my @tokens;
216							my $grammar = TAPx::Grammar->new;
217							LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
218							foreach my $type ( $grammar->token_types ) {
219							my $syntax = $grammar->syntax_for($type);
220							if ( $line =~ $syntax ) {
221							my $handler = $grammar->handler_for($type);
222							push @tokens => $grammar->$handler($line);
223							next LINE;
224							}
225							}
226							push @tokens => $grammar->_make_unknown_token($line);
227							}
228
229							=cut
230
231							sub handler_for {
232	4			4	1	6	my ( $proto, $type ) = @_;
233	4					26	return $token_for{$type}{handler};
234							}
235
236							sub _make_plan_token {
237	69			69		265	my ( $self, $line, $tests_planned, $skip, $explanation ) = @_;
238	69	100				222	if ( 0 == $tests_planned ) {
239	4		50			38	$skip \|\|= 'SKIP';
240							}
241	69	50	66			431	if ( $skip && 0 != $tests_planned ) {
242	0					0	warn
243							"Specified SKIP directive in plan but more than 0 tests ($line)\n";
244							}
245							return {
246	69					1448	type => 'plan',
247							raw => $line,
248							tests_planned => $tests_planned,
249							directive => $skip,
250							explanation => $explanation,
251							};
252							}
253
254							sub _make_test_token {
255	243			243		8164	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
256	243					818	my %test = (
257							ok => $ok,
258							test_num => $num,
259							description => _trim($desc),
260							directive => uc($dir),
261							explanation => _trim($explanation),
262							raw => $line,
263							type => 'test',
264							);
265	243					1186	return \%test;
266							}
267
268							sub _make_unknown_token {
269	13			13		48	my ( $self, $line ) = @_;
270							return {
271	13					113	raw => $line,
272							type => 'unknown',
273							};
274							}
275
276							sub _make_comment_token {
277	34			34		141	my ( $self, $line, $comment ) = @_;
278							return {
279	34					137	type => 'comment',
280							raw => $line,
281							comment => _trim($1)
282							};
283							}
284
285							sub _make_bailout_token {
286	5			5		28	my ( $self, $line, $explanation ) = @_;
287							return {
288	5					15	type => 'bailout',
289							raw => $line,
290							bailout => _trim($1)
291							};
292							}
293
294							sub _trim {
295	599		100	599		2572	my $data = shift \|\| '';
296	599					1386	$data =~ s/^\s+//;
297	599					1368	$data =~ s/\s+$//;
298	599					4581	return $data;
299							}
300
301							=head1 TAP GRAMMAR
302
303							B This grammar is slightly out of date. There's still some discussion
304							about it and a new one will be provided when we have things better defined.
305
306							The C does not use a formal grammar because TAP is essentially a
307							stream-based protocol. In fact, it's quite legal to have an infinite stream.
308							For the same reason that we don't apply regexes to streams, we're not using a
309							formal grammar here. Instead, we parse the TAP in lines.
310
311							For purposes for forward compatability, any result which does not match the
312							following grammar is currently referred to as
313							L. It is I a parse error.
314
315							A formal grammar would look similar to the following:
316
317							(*
318							For the time being, I'm cheating on the EBNF by allowing
319							certain terms to be defined by POSIX character classes by
320							using the following syntax:
321
322							digit ::= [:digit:]
323
324							As far as I am aware, that's not valid EBNF. Sue me. I
325							didn't know how to write "char" otherwise (Unicode issues).
326							Suggestions welcome.
327							*)
328
329							(* POSIX character classes and other terminals *)
330
331							digit ::= [:digit:]
332							character ::= ([:print:] - "\n")
333							positiveInteger ::= ( digit - '0' ) {digit}
334							nonNegativeInteger ::= digit {digit}
335
336							tap ::= { comment \| unknown } leading_plan lines
337							\|
338							lines trailing_plan {comment}
339
340							leading_plan ::= plan skip_directive? "\n"
341
342							trailing_plan ::= plan "\n"
343
344							plan ::= '1..' nonNegativeInteger
345
346							lines ::= line {line}
347
348							line ::= (comment \| test \| unknown \| bailout ) "\n"
349
350							test ::= status positiveInteger? description? directive?
351
352							status ::= 'not '? 'ok '
353
354							description ::= (character - (digit \| '#')) {character - '#'}
355
356							directive ::= todo_directive \| skip_directive
357
358							todo_directive ::= hash_mark 'TODO' ' ' {character}
359
360							skip_directive ::= hash_mark 'SKIP' ' ' {character}
361
362							comment ::= hash_mark {character}
363
364							hash_mark ::= '#' {' '}
365
366							bailout ::= 'Bail out!' {character}
367
368							unknown ::= { (character - "\n") }
369
370							=cut
371
372							1;