File Coverage

blib/lib/TAPx/Parser/Grammar.pm
Criterion Covered Total %
statement 45 46 97.8
branch 8 10 80.0
condition 9 13 69.2
subroutine 14 14 100.0
pod 5 5 100.0
total 81 88 92.0


line stmt bran cond sub pod time code
1             package TAPx::Parser::Grammar;
2              
3 12     12   10128 use strict;
  12         31  
  12         803  
4 12     12   67 use vars qw($VERSION);
  12         26  
  12         521  
5              
6 12     12   5513 use TAPx::Parser::Result;
  12         32  
  12         14934  
7              
8             =head1 NAME
9              
10             TAPx::Parser::Grammar - A grammar for the original TAP version.
11              
12             =head1 VERSION
13              
14             Version 0.50_07
15              
16             =cut
17              
18             $VERSION = '0.50_07';
19              
20             =head1 DESCRIPTION
21              
22             C is actually just a means for identifying individual
23             chunks (usually lines) of TAP.
24              
25             Do not attempt to use this class directly. It won't make sense. It's mainly
26             here to ensure that we will be able to have pluggable grammars when TAP is
27             expanded at some future date (plus, this stuff was really cluttering the
28             parser).
29              
30             Note that currently all methods are class methods. It's intended that this
31             will eventually support C and beyond which will necessitate actual
32             instance data, but for now, we don't need this. Hence, the curious decision
33             to use a class where one doesn't apparently need one.
34              
35             =cut
36              
37             ##############################################################################
38              
39             =head2 Class Methods
40              
41              
42             =head3 C
43              
44             my $grammar = TAPx::Grammar->new;
45              
46             Returns TAP grammar object. Future versions may accept a version number.
47              
48             =cut
49              
50             sub new {
51 67     67 1 1377 my ($class) = @_;
52 67         607 bless {}, $class;
53             }
54              
55             # XXX the 'not' and 'ok' might be on separate lines in VMS ...
56             my $ok = qr/(?:not )?ok\b/;
57             my $num = qr/\d+/;
58              
59             # description is *any* which is not followed by an odd number of escapes
60             # following by '#': \\\# \#
61             my $description = qr/.*?(?!\\(?:\\\\)*)#?/;
62              
63             # if we have an even number of escapes in front of the '#', assert that it
64             # does not have an escape in front of it (this gets around the 'no variable
65             # length lookbehind assertions')
66             my $directive = qr/
67             (?
68             (?i:
69             \#\s+
70             (TODO|SKIP)\b
71             (.*)
72             )?
73             /x;
74              
75             my %token_for = (
76             plan => {
77             syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
78             handler => sub {
79             my ( $self, $line ) = @_;
80             local *__ANON__ = '__ANON__plan_token_handler';
81             my $tests_planned = $1;
82             my $explanation = $2;
83             my $skip =
84             ( 0 == $tests_planned || defined $explanation )
85             ? 'SKIP'
86             : '';
87             $explanation = '' unless defined $explanation;
88             return $self->_make_plan_token(
89             $line,
90             $tests_planned,
91             $skip,
92             _trim($explanation),
93             );
94             },
95             },
96             test => {
97             syntax => qr/^
98             ($ok)
99             \s*
100             ($num)?
101             \s*
102             ($description)?
103             $directive # $4 = directive, $5 = explanation
104             \z/x,
105             handler => sub {
106             my ( $self, $line ) = @_;
107             local *__ANON__ = '__ANON__test_token_handler';
108             my ( $ok, $num, $desc, $dir, $explanation )
109             = ( $1, $2, $3, $4, $5 );
110             return $self->_make_test_token(
111             $line,
112             $ok,
113             $num,
114             $desc,
115             uc $dir,
116             $explanation
117             );
118             },
119             },
120             comment => {
121             syntax => qr/^#(.*)/,
122             handler => sub {
123             my ( $self, $line ) = @_;
124             local *__ANON__ = '__ANON__comment_token_handler';
125             my $comment = $1;
126             return $self->_make_comment_token( $line, $comment );
127             },
128             },
129             bailout => {
130             syntax => qr/^Bail out!\s*(.*)/,
131             handler => sub {
132             my ( $self, $line ) = @_;
133             local *__ANON__ = '__ANON__bailout_token_handler';
134             my $explanation = $1;
135             return $self->_make_bailout_token( $line, _trim($explanation) );
136             },
137             },
138             );
139              
140             ##############################################################################
141              
142             =head3 C
143              
144             my $token = $grammar->tokenize($string);
145              
146             Passed a line of TAP, this method will return a data structure representing a
147             'token' matching that line of TAP input. Designed to be passed to
148             C to create a result object.
149              
150             This is really the only method you need to worry about for the grammar. The
151             methods below are merely for convenience, if needed.
152              
153             =cut
154              
155             sub tokenize {
156 365     365 1 698 my $self = shift;
157 365 100 66     2566 return unless @_ && defined $_[0];
158              
159 361         652 my $line = shift;
160 361         555 my $token;
161              
162 361         1567 foreach my $token_data ( values %token_for ) {
163 697 100       10028 if ( $line =~ $token_data->{syntax} ) {
164 348         741 my $handler = $token_data->{handler};
165 348         1509 $token = $self->$handler($line);
166 348         1248 last;
167             }
168             }
169 361   66     1275 $token ||= $self->_make_unknown_token($line);
170 361 50       2783 return defined $token ? TAPx::Parser::Result->new($token) : ();
171             }
172              
173             ##############################################################################
174              
175             =head2 Class methods
176              
177             =head3 C
178              
179             my @types = $grammar->token_types;
180              
181             Returns the different types of tokens which this grammar can parse.
182              
183             =cut
184              
185 1     1 1 13 sub token_types { keys %token_for }
186              
187             ##############################################################################
188              
189             =head3 C
190              
191             my $syntax = $grammar->syntax_for($token_type);
192              
193             Returns a pre-compiled regular expression which will match a chunk of TAP
194             corresponding to the token type. For example (not that you should really pay
195             attention to this, C<< $grammar->syntax_for('comment') >> will return
196             C<< qr/^#(.*)/ >>.
197              
198             =cut
199              
200             sub syntax_for {
201 4     4 1 1637 my ( $proto, $type ) = @_;
202 4         27 return $token_for{$type}{syntax};
203             }
204              
205             ##############################################################################
206              
207             =head3 C
208              
209             my $handler = $grammar->handler_for($token_type);
210              
211             Returns a code reference which, when passed an appropriate line of TAP,
212             returns the lexed token corresponding to that line. As a result, the basic
213             TAP parsing loop looks similar to the following:
214              
215             my @tokens;
216             my $grammar = TAPx::Grammar->new;
217             LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
218             foreach my $type ( $grammar->token_types ) {
219             my $syntax = $grammar->syntax_for($type);
220             if ( $line =~ $syntax ) {
221             my $handler = $grammar->handler_for($type);
222             push @tokens => $grammar->$handler($line);
223             next LINE;
224             }
225             }
226             push @tokens => $grammar->_make_unknown_token($line);
227             }
228              
229             =cut
230              
231             sub handler_for {
232 4     4 1 6 my ( $proto, $type ) = @_;
233 4         26 return $token_for{$type}{handler};
234             }
235              
236             sub _make_plan_token {
237 69     69   265 my ( $self, $line, $tests_planned, $skip, $explanation ) = @_;
238 69 100       222 if ( 0 == $tests_planned ) {
239 4   50     38 $skip ||= 'SKIP';
240             }
241 69 50 66     431 if ( $skip && 0 != $tests_planned ) {
242 0         0 warn
243             "Specified SKIP directive in plan but more than 0 tests ($line)\n";
244             }
245             return {
246 69         1448 type => 'plan',
247             raw => $line,
248             tests_planned => $tests_planned,
249             directive => $skip,
250             explanation => $explanation,
251             };
252             }
253              
254             sub _make_test_token {
255 243     243   8164 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
256 243         818 my %test = (
257             ok => $ok,
258             test_num => $num,
259             description => _trim($desc),
260             directive => uc($dir),
261             explanation => _trim($explanation),
262             raw => $line,
263             type => 'test',
264             );
265 243         1186 return \%test;
266             }
267              
268             sub _make_unknown_token {
269 13     13   48 my ( $self, $line ) = @_;
270             return {
271 13         113 raw => $line,
272             type => 'unknown',
273             };
274             }
275              
276             sub _make_comment_token {
277 34     34   141 my ( $self, $line, $comment ) = @_;
278             return {
279 34         137 type => 'comment',
280             raw => $line,
281             comment => _trim($1)
282             };
283             }
284              
285             sub _make_bailout_token {
286 5     5   28 my ( $self, $line, $explanation ) = @_;
287             return {
288 5         15 type => 'bailout',
289             raw => $line,
290             bailout => _trim($1)
291             };
292             }
293              
294             sub _trim {
295 599   100 599   2572 my $data = shift || '';
296 599         1386 $data =~ s/^\s+//;
297 599         1368 $data =~ s/\s+$//;
298 599         4581 return $data;
299             }
300              
301             =head1 TAP GRAMMAR
302              
303             B This grammar is slightly out of date. There's still some discussion
304             about it and a new one will be provided when we have things better defined.
305              
306             The C does not use a formal grammar because TAP is essentially a
307             stream-based protocol. In fact, it's quite legal to have an infinite stream.
308             For the same reason that we don't apply regexes to streams, we're not using a
309             formal grammar here. Instead, we parse the TAP in lines.
310              
311             For purposes for forward compatability, any result which does not match the
312             following grammar is currently referred to as
313             L. It is I a parse error.
314              
315             A formal grammar would look similar to the following:
316              
317             (*
318             For the time being, I'm cheating on the EBNF by allowing
319             certain terms to be defined by POSIX character classes by
320             using the following syntax:
321            
322             digit ::= [:digit:]
323            
324             As far as I am aware, that's not valid EBNF. Sue me. I
325             didn't know how to write "char" otherwise (Unicode issues).
326             Suggestions welcome.
327             *)
328            
329             (* POSIX character classes and other terminals *)
330            
331             digit ::= [:digit:]
332             character ::= ([:print:] - "\n")
333             positiveInteger ::= ( digit - '0' ) {digit}
334             nonNegativeInteger ::= digit {digit}
335            
336             tap ::= { comment | unknown } leading_plan lines
337             |
338             lines trailing_plan {comment}
339            
340             leading_plan ::= plan skip_directive? "\n"
341              
342             trailing_plan ::= plan "\n"
343              
344             plan ::= '1..' nonNegativeInteger
345            
346             lines ::= line {line}
347              
348             line ::= (comment | test | unknown | bailout ) "\n"
349            
350             test ::= status positiveInteger? description? directive?
351            
352             status ::= 'not '? 'ok '
353            
354             description ::= (character - (digit | '#')) {character - '#'}
355            
356             directive ::= todo_directive | skip_directive
357              
358             todo_directive ::= hash_mark 'TODO' ' ' {character}
359              
360             skip_directive ::= hash_mark 'SKIP' ' ' {character}
361              
362             comment ::= hash_mark {character}
363              
364             hash_mark ::= '#' {' '}
365              
366             bailout ::= 'Bail out!' {character}
367              
368             unknown ::= { (character - "\n") }
369              
370             =cut
371              
372             1;