File Coverage

blib/lib/Parse/FSM/Driver.pm
Criterion Covered Total %
statement 73 74 98.6
branch 34 36 94.4
condition n/a
subroutine 14 14 100.0
pod 5 5 100.0
total 126 129 97.6


line stmt bran cond sub pod time code
1             # $Id: Driver.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM::Driver;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM::Driver - Run-time engine for Parse::FSM parser
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 6     6   851 use warnings;
  6         11  
  6         155  
16 6     6   26 use strict;
  6         12  
  6         107  
17            
18 6     6   24 use Carp; our @CARP_NOT = ('Parse::FSM::Driver');
  6         11  
  6         538  
19 6     6   782 use Data::Dump 'dump';
  6         5228  
  6         489  
20            
21             our $VERSION = '1.12';
22            
23             #------------------------------------------------------------------------------
24            
25             =head1 SYNOPSIS
26            
27             use MyParser; # isa Parse::FSM::Driver
28            
29             $parser = MyParser->new;
30             $parser->input( \&lexer );
31             $parser->user( $user_pointer );
32            
33             $result = $parser->parse( $start_rule );
34             $result = $parser->parse_start_rule;
35            
36             $token = $parser->peek_token;
37             $token = $parser->get_token;
38             $parser->unget_token(@tokens);
39            
40             =head1 DESCRIPTION
41            
42             This module implements a deterministic top-down parser based on a
43             pre-computed Finite State Machine (FSM).
44            
45             The FSM is generated by L, by
46             reading a BNF-type grammar file and generating
47             a run-time module that includes the state tables. The module also include
48             the run-time parsing routine that follows the state tables to obtain
49             a parse of the input.
50            
51             This module is not intended to be used stand alone. It is used as a base class
52             by the modules generated by L.
53            
54             =head1 METHODS - SETUP
55            
56             =head2 new
57            
58             Creates a new object.
59            
60             =head2 user
61            
62             Get/set of the parser user pointer. The user pointer is not used by the parser,
63             and is available for communication between the parser actions and the
64             calling module.
65            
66             It can for example point to a data structure that describes the objects
67             already identified in the parse.
68            
69             =cut
70            
71             #------------------------------------------------------------------------------
72             # Parsing state machine
73             # Each state hash has:
74             # terminal => (state ID), for a match
75             # terminal => [ (subrule ID), (next state ID) ], for a sub-rule
76             # followed by a match
77             # terminal => [ (subrule ID), sub{} ], for a sub-rule followed by an accept
78             # terminal => sub{}, for an accept
79             # Each sub{} has $self and @args pre-declared
80             # @args is [] of all parsed elements
81             # $self is the Parse::FSM::Driver object
82            
83             #------------------------------------------------------------------------------
84             use Class::XSAccessor {
85 6         105 constructor => '_init',
86             accessors => [
87             'input', # input iterator
88             '_head', # unget queue of tokens retrived from input
89             'user', # user pointer
90             '_state_table', # list of states
91             '_start_state', # ID of start state
92             ],
93 6     6   738 };
  6         2466  
94            
95             #------------------------------------------------------------------------------
96             sub new {
97 100     100 1 2351 my($class, @args) = @_;
98             return $class->_init(
99       3     input => sub {},
100 100         1142 _head => [],
101             user => {},
102             _state_table => [],
103             _start_state => 0,
104             @args);
105             }
106             #------------------------------------------------------------------------------
107            
108             =head1 METHODS - INPUT STREAM
109            
110             =head2 input
111            
112             Get/set the parser input lexer iterator. The iterator is a code reference of
113             a function that returns the next token to be parsed as an array ref,
114             with token type and token value C<[$type, $value]>.
115             It returns C on end of input. E.g. for a simple expression lexer:
116            
117             sub make_lexer {
118             my($line) = @_;
119             return sub {
120             for ($line) {
121             /\G\s+/gc;
122             return [NUM => $1] if /\G(\d+)/gc;
123             return [NAME => $1] if /\G([a-z]\w*)/gci;
124             return [$1 => $1] if /\G(.)/gc;
125             return;
126             }
127             };
128             }
129             $parser->input(make_lexer("2+3*4"));
130            
131             =head2 peek_token
132            
133             Returns the next token to be retrieved by the lexer, but keeps it in the input
134             queue. Can be used by a rule action to decide based on the input that follows.
135            
136             =cut
137            
138             #------------------------------------------------------------------------------
139             sub peek_token {
140 2407     2407 1 2806 my($self) = @_;
141 2407 100       2346 @{$self->_head} or push @{$self->_head}, $self->input->();
  1340         3804  
  2407         6055  
142 2403         10481 return $self->_head->[0]; # may be undef, if end of input
143             }
144             #------------------------------------------------------------------------------
145            
146             =head2 get_token
147            
148             Extracts the next token from the lexer stream. Can be used by a rule action to
149             discard the following tokens.
150            
151             =cut
152            
153             #------------------------------------------------------------------------------
154             sub get_token {
155 886     886 1 28971 my($self) = @_;
156 886 100       829 @{$self->_head} and return shift @{$self->_head};
  698         1499  
  886         2531  
157 188         646 return $self->_head->[0]; # may be undef, if end of input
158             }
159             #------------------------------------------------------------------------------
160            
161             =head2 unget_token
162            
163             Pushes back the given list of tokens to the lexer input stream, to be retrieved
164             on the next calls to C.
165            
166             =cut
167            
168             #------------------------------------------------------------------------------
169             sub unget_token {
170 4     4 1 8 my($self, @tokens) = @_;
171 4         5 unshift @{$self->_head}, @tokens;
  4         16  
172 4         43 return;
173             }
174             #------------------------------------------------------------------------------
175            
176             =head1 METHODS - PARSING
177            
178             =head2 parse
179            
180             This function receives an optional start rule name, and uses the default rule
181             of the grammar if not supplied.
182            
183             It parses the input stream, leaving the stream at the first unparsed
184             token, and returns the parse value - the result of the action function for the
185             start rule.
186            
187             The function dies with an error message indicating the input that cannot
188             be parsed in case of a parse error.
189            
190             =head2 parse_XXX
191            
192             For each rule C in the grammar, L creates a
193             corresponding C to start the parse at that rule. This is a
194             short-cut to C.
195            
196             =cut
197            
198             #------------------------------------------------------------------------------
199             sub parse {
200 175     175 1 13064 my($self, $start_rule) = @_;
201            
202             # current state
203 175         209 my $state;
204 175 100       344 if (defined($start_rule)) {
205 8 100       199 $state = $self->_state_table->[0]{$start_rule}
206             or croak "Rule $start_rule not found";
207             }
208             else {
209 167 50       578 $state = $self->_start_state
210             or croak "Start state not found";
211             }
212 174         397 return $self->_parse($state);
213             }
214            
215             #------------------------------------------------------------------------------
216             sub _parse {
217 194     194   256 my($self, $state) = @_;
218            
219 194         282 my @values = ();
220            
221             # return stack of states
222 194         218 my @stack = (); # store: [$state, @values]
223            
224             # fetch token only after drop and after calling parser rules
225 194         457 my $token = $self->peek_token;
226 194         229 while (1) {
227 2572         2611 my($entry, $found_else);
228 2572 100       8970 if ($entry = $self->_state_table->[$state]{($token ? $token->[0] : "")}) {
    100          
    100          
229             # entry exists, found token
230             }
231             elsif ($entry = $self->_state_table->[$state]{__else__}) {
232 521         677 $found_else++;
233             }
234             else {
235 82         250 $self->_error_at($token, $state);
236             }
237            
238 2490 100       4507 if (ref($entry) eq 'ARRAY') { # call sub-rule
239 1411         1756 my($next_state, $return_state) = @$entry;
240 1411         2386 push(@stack, [ $return_state, @values ]); # return data
241 1411         2577 ($state, @values) = ($next_state); # call
242             }
243             else { # accept token
244 1079         1151 $state = $entry;
245            
246 1079 100       1816 if (!$found_else) {
247 725 100       1420 push(@values, $token) if $token; # add token to values
248 725         1209 $self->get_token; # drop value
249 725         1300 $token = $self->peek_token; # and get next token
250             }
251            
252 1075         2451 while (ref($state) eq 'CODE') { # return from sub-rules
253 1460         8406 my $value = $self->$state(@values);
254 1460         2775 $token = $self->peek_token; # input may have changed
255            
256 1460 100       2665 if ( ! @stack ) { # END OF PARSE
257 108         571 return $value;
258             }
259            
260 1352         1557 my $top = pop(@stack);
261 1352         2769 ($state, @values) = @$top;
262            
263             # keep only defined values
264 1352 100       5234 push(@values, $value) if defined($value);
265             }
266             }
267             }
268 0         0 die 'not reached';
269             }
270            
271             #------------------------------------------------------------------------------
272             # expected error at given stream position, die with error message
273             sub _error_at {
274 84     84   596 my($self, $token, $state) = @_;
275            
276 95         737 my @expected = sort map {_format_token($_)}
277 84         109 keys %{$self->_state_table->[$state]};
  84         300  
278 84 100       1545 die("Expected ",
    100          
279             scalar(@expected) == 1 ? "@expected" : "one of (@expected)",
280             " at ",
281             defined($token) ? _format_token($token->[0]) : "EOF",
282             "\n");
283             }
284            
285             #------------------------------------------------------------------------------
286             # format a token
287             sub _format_token {
288 131     131   179 my($token) = @_;
289 131 50       250 return "" if !defined($token);
290 131 100       299 return "EOF" if $token eq "";
291 113 100       379 return dump($token) if $token =~ /\W/;
292 89         463 return $token;
293             }
294             #------------------------------------------------------------------------------
295            
296             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE, COPYRIGHT
297            
298             See L
299            
300             =cut
301            
302             #------------------------------------------------------------------------------
303            
304             1;