File Coverage

blib/lib/Parse/FSM/Driver.pm
Criterion Covered Total %
statement 74 75 98.6
branch 34 36 94.4
condition n/a
subroutine 14 14 100.0
pod 5 5 100.0
total 127 130 97.6


line stmt bran cond sub pod time code
1             # $Id: Driver.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM::Driver;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM::Driver - Run-time engine for Parse::FSM parser
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 6     6   967 use warnings;
  6         10  
  6         200  
16 6     6   26 use strict;
  6         7  
  6         200  
17            
18 6     6   24 use Carp; our @CARP_NOT = ('Parse::FSM::Driver');
  6         8  
  6         574  
19 6     6   752 use Data::Dump 'dump';
  6         6482  
  6         621  
20            
21             our $VERSION = '1.11';
22            
23             #------------------------------------------------------------------------------
24            
25             =head1 SYNOPSIS
26            
27             use MyParser; # isa Parse::FSM::Driver
28            
29             $parser = MyParser->new;
30             $parser->input( \&lexer );
31             $parser->user( $user_pointer );
32            
33             $result = $parser->parse( $start_rule );
34             $result = $parser->parse_start_rule;
35            
36             $token = $parser->peek_token;
37             $token = $parser->get_token;
38             $parser->unget_token(@tokens);
39            
40             =head1 DESCRIPTION
41            
42             This modules implements a deterministic top-down parser based on a
43             pre-computed Finite State Machine (FSM).
44            
45             The FSM is generated by L, by
46             reading a BNF-type grammar file and generating
47             a run-time module that includes the state tables. The module also include
48             the run-time parsing routine that follows the state tables to obtain
49             a parse of the input.
50            
51             This module is not intended to be used stand alone. It is used as a base class
52             by the modules generated by L.
53            
54             =head1 METHODS - SETUP
55            
56             =head2 new
57            
58             Creates a new object.
59            
60             =head2 user
61            
62             Get/set of the parser user pointer. The user pointer is not used by the parser,
63             and is available for communication between the parser actions and the
64             calling module.
65            
66             It can for example point to a data structure that describes the objects
67             already identified in the parse.
68            
69             =cut
70            
71             #------------------------------------------------------------------------------
72             # Parsing state machine
73             # Each state hash has:
74             # terminal => (state ID), for a match
75             # terminal => [ (subrule ID), (next state ID) ], for a sub-rule
76             # followed by a match
77             # terminal => [ (subrule ID), sub{} ], for a sub-rule followed by an accept
78             # terminal => sub{}, for an accept
79             # Each sub{} has $self and @args pre-declared
80             # @args is [] of all parsed elements
81             # $self is the Parse::FSM::Driver object
82            
83             #------------------------------------------------------------------------------
84             use Class::XSAccessor {
85 6         58 constructor => '_init',
86             accessors => [
87             'input', # input iterator
88             '_head', # unget queue of tokens retrived from input
89             'user', # user pointer
90             '_state_table', # list of states
91             '_start_state', # ID of start state
92             ],
93 6     6   760 };
  6         2723  
94            
95             #------------------------------------------------------------------------------
96             sub new {
97 100     100 1 2847 my($class, @args) = @_;
98             return $class->_init(
99 3     3   455 input => sub {},
100 100         1469 _head => [],
101             user => {},
102             _state_table => [],
103             _start_state => 0,
104             @args);
105             }
106             #------------------------------------------------------------------------------
107            
108             =head1 METHODS - INPUT STREAM
109            
110             =head2 input
111            
112             Get/set the parser input lexer iterator. The iterator is a code reference of
113             a function that returns the next token to be parsed as an array ref,
114             with token type and token value C<[$type, $value]>.
115             It returns C on end of input. E.g. for a simple expression lexer:
116            
117             sub make_lexer {
118             my($line) = @_;
119             return sub {
120             for ($line) {
121             /\G\s+/gc;
122             return [NUM => $1] if /\G(\d+)/gc;
123             return [NAME => $1] if /\G([a-z]\w*)/gci;
124             return [$1 => $1] if /\G(.)/gc;
125             return;
126             }
127             };
128             }
129             $parser->input(make_lexer("2+3*4"));
130            
131             =head2 peek_token
132            
133             Returns the next token to be retrieved by the lexer, but keeps it in the input
134             queue. Can be used by a rule action to decide based on the input that follows.
135            
136             =cut
137            
138             #------------------------------------------------------------------------------
139             sub peek_token {
140 2407     2407 1 2135 my($self) = @_;
141 2407 100       1775 @{$self->_head} or push @{$self->_head}, $self->input->();
  1340         2945  
  2407         5008  
142 2403         8972 return $self->_head->[0]; # may be undef, if end of input
143             }
144             #------------------------------------------------------------------------------
145            
146             =head2 get_token
147            
148             Extracts the next token from the lexer stream. Can be used by a rule action to
149             discard the following tokens.
150            
151             =cut
152            
153             #------------------------------------------------------------------------------
154             sub get_token {
155 886     886 1 31433 my($self) = @_;
156 886 100       727 @{$self->_head} and return shift @{$self->_head};
  698         1251  
  886         2352  
157 188         647 return $self->_head->[0]; # may be undef, if end of input
158             }
159             #------------------------------------------------------------------------------
160            
161             =head2 unget_token
162            
163             Pushes back the given list of tokens to the lexer input stream, to be retrieved
164             on the next calls to C.
165            
166             =cut
167            
168             #------------------------------------------------------------------------------
169             sub unget_token {
170 4     4 1 9 my($self, @tokens) = @_;
171 4         5 unshift @{$self->_head}, @tokens;
  4         16  
172 4         38 return;
173             }
174             #------------------------------------------------------------------------------
175            
176             =head1 METHODS - PARSING
177            
178             =head2 parse
179            
180             This function receives an optional start rule name, and uses the default rule
181             of the grammar if not supplied.
182            
183             It parses the input stream, leaving the stream at the first unparsed
184             token, and returns the parse value - the result of the action function for the
185             start rule.
186            
187             The function dies with an error message indicating the input that cannot
188             be parsed in case of a parse error.
189            
190             =head2 parse_XXX
191            
192             For each rule C in the grammar, L creates a correspnding
193             C to start the parse at that rule. This is a short-cut to
194             C.
195            
196             =cut
197            
198             #------------------------------------------------------------------------------
199             sub parse {
200 175     175 1 11757 my($self, $start_rule) = @_;
201            
202             # current state
203 175         226 my $state;
204 175 100       380 if (defined($start_rule)) {
205 8 100       212 $state = $self->_state_table->[0]{$start_rule}
206             or croak "Rule $start_rule not found";
207             }
208             else {
209 167 50       666 $state = $self->_start_state
210             or croak "Start state not found";
211             }
212 174         434 return $self->_parse($state);
213             }
214            
215             #------------------------------------------------------------------------------
216             sub _parse {
217 194     194   235 my($self, $state) = @_;
218            
219 194         272 my @values = ();
220            
221             # return stack of states
222 194         213 my @stack = (); # store: [$state, @values]
223            
224             # fetch token only after drop and after calling parser rules
225 194         422 my $token = $self->peek_token;
226 194         282 while (1) {
227 2572         1934 my($entry, $found_else);
228 2572 100       8424 if ($entry = $self->_state_table->[$state]{($token ? $token->[0] : "")}) {
    100          
    100          
229             # entry exists, found token
230             }
231             elsif ($entry = $self->_state_table->[$state]{__else__}) {
232 521         478 $found_else++;
233             }
234             else {
235 82         279 $self->_error_at($token, $state);
236             }
237            
238 2490 100       3358 if (ref($entry) eq 'ARRAY') { # call sub-rule
239 1411         1536 my($next_state, $return_state) = @$entry;
240 1411         1985 push(@stack, [ $return_state, @values ]); # return data
241 1411         2013 ($state, @values) = ($next_state); # call
242             }
243             else { # accept token
244 1079         858 $state = $entry;
245            
246 1079 100       1646 if (!$found_else) {
247 725 100       1289 push(@values, $token) if $token; # add token to values
248 725         1194 $self->get_token; # drop value
249 725         1046 $token = $self->peek_token; # and get next token
250             }
251            
252 1075         1964 while (ref($state) eq 'CODE') { # return from sub-rules
253 1460         8447 my $value = $self->$state(@values);
254 1460         2271 $token = $self->peek_token; # input may have changed
255            
256 1460 100       2351 if ( ! @stack ) { # END OF PARSE
257 108         651 return $value;
258             }
259            
260 1352         1257 my $top = pop(@stack);
261 1352         2412 ($state, @values) = @$top;
262            
263             # keep only defined values
264 1352 100       4388 push(@values, $value) if defined($value);
265             }
266             }
267             }
268 0         0 die 'not reached';
269             }
270            
271             #------------------------------------------------------------------------------
272             # expected error at given stream position, die with error message
273             sub _error_at {
274 84     84   493 my($self, $token, $state) = @_;
275            
276 95         773 my @expected = sort map {_format_token($_)}
  84         288  
277 84         98 keys %{$self->_state_table->[$state]};
278 84 100       1999 die("Expected ",
    100          
279             scalar(@expected) == 1 ? "@expected" : "one of (@expected)",
280             " at ",
281             defined($token) ? _format_token($token->[0]) : "EOF",
282             "\n");
283             }
284            
285             #------------------------------------------------------------------------------
286             # format a token
287             sub _format_token {
288 131     131   183 my($token) = @_;
289 131 50       258 return "" if !defined($token);
290 131 100       321 return "EOF" if $token eq "";
291 113 100       434 return dump($token) if $token =~ /\W/;
292 89         409 return $token;
293             }
294             #------------------------------------------------------------------------------
295            
296             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE, COPYRIGHT
297            
298             See L
299            
300             =cut
301            
302             #------------------------------------------------------------------------------
303            
304             1;