File Coverage

blib/lib/Parse/FSM.pm
Criterion Covered Total %
statement 276 277 99.6
branch 81 96 84.3
condition 25 33 75.7
subroutine 54 56 96.4
pod 6 6 100.0
total 442 468 94.4


line stmt bran cond sub pod time code
1             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 7     7   16076 use strict;
  7         7  
  7         164  
16 7     7   21 use warnings;
  7         7  
  7         141  
17            
18 7     7   21 use Carp; our @CARP_NOT = ('Parse::FSM');
  7         9  
  7         530  
19 7     7   2014 use Data::Dump 'dump';
  7         19527  
  7         324  
20 7     7   3297 use Text::Template 'fill_in_string';
  7         14141  
  7         334  
21 7     7   2878 use File::Slurp;
  7         52457  
  7         612  
22            
23             our $VERSION = '1.13';
24            
25             #------------------------------------------------------------------------------
26            
27             =head1 SYNOPSIS
28            
29             use Parse::FSM;
30             $fsm = Parse::FSM->new;
31            
32             $fsm->prolog($text);
33             $fsm->epilog($text);
34             $fsm->add_rule($name, @elems, $action);
35             $fsm->start_rule($name);
36            
37             $fsm->parse_grammar($text);
38            
39             $fsm->write_module($module);
40             $fsm->write_module($module, $file);
41            
42             $parser = $fsm->parser; # isa Parse::FSM::Driver
43             $parser->input(\&lexer);
44             $result = $parser->parse;
45            
46             # script
47             perl -MParse::FSM - Grammar.yp Parser::Module
48             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49            
50             =head1 DESCRIPTION
51            
52             This module compiles the Finite State Machine used by the
53             L parser module.
54            
55             It can be used by a sequence of C calls, or by parsing a yacc-like
56             grammar in one go with C.
57            
58             It can be used as a script to generate a module from a grammar file.
59            
60             The result of compiling the parser can be used immediately by retrieving the
61             C object, or a pre-compiled module can be written to disk by
62             C. This module can then be used by the client code of the parser.
63            
64             As usual in top-down parsers, left recursion is not supported
65             and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66            
67             =head1 METHODS - SETUP
68            
69             =head2 new
70            
71             Creates a new object.
72            
73             =cut
74            
75             #------------------------------------------------------------------------------
76             use Class::XSAccessor {
77 6         41 constructor => '_init',
78             accessors => [
79             '_tree', # parse tree
80             # Contains nested HASH tables with the decision tree
81             # used during parsing.
82             # Each node maps:
83             # token => next node / string with action code
84             # [subrule] => next node / string with action code
85             # [subrule]? => next node / string with action code
86             # [subrule]* => next node / string with action code
87             # __else__ => next node / string with action code
88             # The first level are the rule names.
89            
90             '_state_table', # ARRAY that maps each state ID to the corresponding
91             # HASH table from tree.
92             # Copied to the generated parser module.
93            
94             '_action', # map func text => [ sub name, sub text ]
95            
96             'start_rule', # name start rule
97             'prolog', # code to include near the beginning of the file
98             'epilog', # code to include at the end of the file
99             '_names', # keep all generated names up to now, to be able to
100             # create unique ones
101             ],
102 6     6   2347 };
  6         10009  
103            
104             #------------------------------------------------------------------------------
105             sub new {
106 70     70 1 20161 my($class) = @_;
107 70         333 return $class->_init(_tree => {}, _state_table => [], _action => {},
108             _names => {});
109             }
110            
111             #------------------------------------------------------------------------------
112             # create a new unique name (for actions, sub-rules)
113             sub _unique_name {
114 153     153   135 my($self, $name) = @_;
115 153         118 my $id = 1;
116 153         335 while (exists $self->_names->{$name.$id}) {
117 11         20 $id++;
118             }
119 153         262 $self->_names->{$name.$id}++;
120 153         219 return $name.$id;
121             }
122            
123             #------------------------------------------------------------------------------
124            
125             =head1 METHODS - BUILD GRAMMAR
126            
127             =head2 start_rule
128            
129             Name of the grammar start rule. It defaults to the first rule added by C.
130            
131             =head2 prolog, epilog
132            
133             Perl code to include in the generated module near the start of the generated
134             module and near the end of it.
135            
136             =head2 add_rule
137            
138             Adds one rule to the parser.
139            
140             $fsm->add_rule($name, @elems, $action);
141            
142             C<$name> is the name of the rule, i.e. the syntactic object recognized
143             by the rule.
144            
145             C<@elems> is the list of elements in sequence needed to recognize this rule.
146             Each element can be one of:
147            
148             =over 4
149            
150             =item *
151            
152             A string that will match with that token type from the lexer.
153            
154             The empty string is used to match the end of input and should
155             be present in the grammar to force the parser
156             to accept all the input;
157            
158             =item *
159            
160             An array reference of a list of all possible tokens to accept at this position.
161            
162             =item *
163            
164             A subrule name inside square brackets, optionally followed by a
165             repetition character that asks the parser to recursively descend
166             to match that subrule at the current input location.
167            
168             The accepted forms are:
169            
170             C<[term]> - recurse to the term rule;
171            
172             C<[term]?> - term is optional;
173            
174             C<[term]*> - accept zero or more terms;
175            
176             C<[term]+> - accept one or more terms;
177            
178             C<[term]E+,E> - accept one or more terms separated by commas,
179             any token type can be used instead of the comma;
180            
181             =back
182            
183             C<$action> is the Perl text of the action executed when the rule is recognized,
184             i.e. all elements were found in sequence.
185            
186             It has to be enclosed in brackets C<{}>, and can use the following lexical
187             variables that are declared by the generated code:
188            
189             =over 4
190            
191             =item *
192            
193             C<$self> : object pointer;
194            
195             =item *
196            
197             C<@item> : values of all the tokens or rules identified in this rule. The subrule
198             call with repetitions return an array reference containing all the found items
199             in the subrule;
200            
201             =back
202            
203             =cut
204            
205             #------------------------------------------------------------------------------
206             # add_rule
207             # Args:
208             # rule name
209             # list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210             # token # tokens
211             # action : '{ CODE }'
212             sub add_rule {
213 151     151 1 8695 my($self, $rule_name, @elems) = @_;
214 151         145 my $action = pop(@elems);
215            
216 151 100       412 @elems or croak "missing arguments";
217 150 50       439 $rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218            
219             # check for array-ref @elem and recurse for all alternatives
220 150         274 for my $i (0 .. $#elems) {
221 259 100       419 if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222 2         2 for (@{$elems[$i]}) {
  2         4  
223 4         16 $self->add_rule($rule_name,
224             @elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225             $action);
226             }
227 2         5 return;
228             }
229             }
230            
231 148         201 $self->_check_start_rule($rule_name);
232            
233             # load the tree
234 148         145 my $tree = $self->_tree;
235 148         190 $tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236            
237 148         202 my $comment = "$rule_name :";
238            
239 148         218 while (@elems) {
240 252         197 my $elem = shift @elems;
241            
242             # handle subrule calls with quantifiers
243             # check if recursing for _add_list_rule
244 252 100 100     845 if ($rule_name !~ /^_lst_/ &&
245             $elem =~ /^ \[ .* \] /x) {
246 68         95 $elem = $self->_add_list_rule($elem);
247             }
248            
249 252         318 $tree->{__comment__} = $comment; # way up to this state
250            
251 252 100       564 $comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252            
253 252 100       8303 if (@elems) { # not a leaf node
254             croak "leaf and node at ($comment)"
255 105 100 100     344 if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256 104         141 $tree = $self->_add_tree_node($tree, $elem); # load token
257             }
258             else { # leaf node
259             croak "leaf not unique at ($comment)"
260 147 100       406 if (exists($tree->{$elem}));
261 146         187 $self->_add_tree_node($tree, $elem); # create node
262 146         179 $tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263             }
264             }
265            
266 142         267 return;
267             }
268            
269             #------------------------------------------------------------------------------
270             # add a list subrule, get passed a string '[subrule]*'
271             sub _add_list_rule {
272 68     68   84 my($self, $elem) = @_;
273            
274 68 50       217 $elem =~ /^ \[ (\w+) \] ( [?*+] | <\+.*> )? $/x
275             or croak "invalid subrule call $elem";
276 68         119 my($subrule, $quant) = ($1, $2);
277            
278 68 100       144 return "[$subrule]" unless $quant; # subrule without quatifier
279            
280             # create a list subrule, so that the result of the repetition is returned
281             # as an array reference
282 22         40 my $list_subrule = $self->_unique_name("_lst_".$subrule);
283            
284 22 100 100     125 if ($quant eq '*' || $quant eq '?') {
    100          
    50          
285 12         25 $self->add_rule($list_subrule, "[$subrule]$quant",
286             '{ return \@item }');
287             }
288             elsif ($quant eq '+') { # A+ -> A A*
289 5         18 $self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290             '{ return \@item }');
291             }
292             elsif ($quant =~ /^< \+ (.*) >$/x) { # A<+;> -> A Ac* ; Ac : ';' A
293 5         7 my $separator = $1;
294 5         11 my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295            
296             # Ac : ';' A
297 5         14 $self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298             '{ return $item[1] }');
299            
300             # A Ac*
301 5         14 $self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302             '{ return \@item }');
303             }
304             else {
305 0         0 die; # not reached
306             }
307            
308 22         37 return "[$list_subrule]";
309             }
310            
311             #------------------------------------------------------------------------------
312             # add a tree node and create a new state
313             sub _add_tree_node {
314 398     398   367 my($self, $tree, $elem) = @_;
315            
316 398   100     1071 $tree->{$elem} ||= {};
317            
318             # new state?
319 398 100       538 if (! exists $tree->{__state__}) {
320 278         178 my $id = scalar(@{$self->_state_table});
  278         359  
321 278         243 $tree->{__state__} = $id;
322 278         350 $self->_state_table->[$id] = $tree;
323             }
324            
325 398         515 return $tree->{$elem};
326             }
327            
328             #------------------------------------------------------------------------------
329             # define start rule, except if starting with '_' (internal)
330             sub _check_start_rule {
331 151     151   1441 my($self, $rule_name) = @_;
332            
333 151 100 100     526 if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334 58         106 $self->start_rule($rule_name); # start rule is first defined rule
335             }
336            
337 151         139 return;
338             }
339            
340             #------------------------------------------------------------------------------
341             # _add_action()
342             # Create a new action or re-use an existing one. An action has to start by
343             # '{'; a new name is created and a reference to the name is
344             # returned : "\&_action_RULE"
345             sub _add_action {
346 146     146   155 my($self, $action, $rule_name, $comment) = @_;
347            
348             # remove braces
349 146 100       1256 $action =~ s/ \A \s* \{ \s* (.*?) \s* \} \s* \z /$1/xs
350             or croak "action must be enclosed in {}";
351            
352             # reuse an existing action, if any
353 142         379 (my $cannon_action = $action) =~ s/\s+//g;
354 142 100       272 if (!$self->_action->{$cannon_action}) {
355 126         226 my $action_name = $self->_unique_name("_act_".$rule_name);
356            
357             # reduce indentation
358 126         164 for ($action) {
359 126         162 my($lead_space) = /^(\t+)/m;
360 126 100       322 $lead_space and s/^$lead_space/\t/gm;
361             }
362            
363             $action =
364 126 100       374 "# $comment\n".
365             "sub $action_name {".
366             ($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367             $action.
368             "\n}\n\n";
369            
370 126         291 $self->_action->{$cannon_action} = [ $action_name, $action ];
371             }
372             else {
373             # append this comment
374 16         90 $self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375             }
376            
377 142         537 return "\\&".$self->_action->{$cannon_action}[0];
378             }
379            
380             #------------------------------------------------------------------------------
381             # compute the FSM machine
382             #
383             # expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384             # Search for all sub-rule calls, and add each of the first tokens of the subrule
385             # to the call. Repeat until no more rules added, to cope with follow sets being
386             # computed after being looked up
387             # creates FSM loops for the constructs:
388             # A -> B?
389             # A -> B*
390             sub _compute_fsm {
391 48     48   47 my($self) = @_;
392            
393             # repeat until no more follow tokens added
394             # Example : A B[?*] C
395 48         38 my $changed;
396 48         36 do {
397 85         52 $changed = 0;
398            
399             # check all states in turn
400 85         55 for my $state (@{$self->_state_table}) {
  85         126  
401 571         1132 my %state_copy = %$state;
402 571         905 while (my($token, $next_state) = each %state_copy) {
403 2207 100       5088 next unless my($subrule_name, $quant) =
404             $token =~ /^ \[ (.*) \] ( [?*] )? $/x;
405            
406             my $next_state_text = ref($next_state) eq 'HASH' ?
407             $next_state->{__state__} :
408 248 100       340 $next_state;
409            
410 248 50       420 my $subrule = $self->_tree->{$subrule_name}
411             or croak "rule $subrule_name not found";
412 248 50       291 ref($subrule) eq 'HASH' or die;
413            
414             # call subrule on each of the subrule follow set
415             # Example : add all 'follow(B) -> call B' to current rule
416 248         341 for my $subrule_key (keys %$subrule) {
417 1062 100       1740 next if $subrule_key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
418             my $text = "[ ".$subrule->{__state__}.", ".
419             (($quant||"") eq '*' ?
420             $state->{__state__} : # loop on a '*'
421 454 100 100     1355 $next_state_text # else, next state
422             )." ]";
423 454 100       417 if ($state->{$subrule_key}) {
424 307 50       459 die if $state->{$subrule_key} ne $text;
425             }
426             else {
427 147         125 $state->{$subrule_key} = $text;
428 147         131 $changed++;
429             }
430             }
431            
432             # call next rule on the next rule follow set
433             # Example : add all 'follow(C) -> end' to end current rule
434 248 100       593 if (defined($quant)) {
435 56 100       63 if ($state->{__else__}) {
436 34 50       97 die if $state->{__else__} ne $next_state_text;
437             }
438             else {
439 22         34 $state->{__else__} = $next_state_text;
440 22         52 $changed++;
441             }
442             }
443             }
444             }
445             } while ($changed);
446            
447 48         47 return;
448             }
449            
450             #------------------------------------------------------------------------------
451            
452             =head2 parse_grammar
453            
454             Parses the given grammar text and adds to the parser. Example grammar follows:
455            
456             {
457             # prolog
458             use MyLibrary;
459             }
460            
461             main : (number | name)+ ;
462             number : 'NUMBER' { $item[0][1] } ; # comment
463             name : 'NAME' { $item[0][1] } ; # comment
464            
465             expr : ;
466            
467            
468            
469             {
470             # epilog
471             sub util_method {...}
472             }
473            
474             =over 4
475            
476             =item prolog
477            
478             If the text contains a code block surrounded by braces before the first rule
479             definition, the text is copied without the external braces to the prolog
480             of generated module.
481            
482             =item epilog
483            
484             If the text contains a code block surrounded by braces after the last rule
485             definition, the text is copied without the external braces to the epilog
486             of generated module.
487            
488             =item statements
489            
490             Statements are either rule definitions of directives and end with a
491             semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492             the end of the line.
493            
494             =item rule
495            
496             A rule defines one sentence to match in the grammar. The first rule defined
497             is the default start rule, i.e. the rule parsed by default on the input.
498             A rule name must start with a letter and contain only letters,
499             digits and the underscore character.
500            
501             The rule definition follows after a colon and is composed of a sequence
502             of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503             when all the tokens and sub-rules in the definition match in sequence.
504            
505             The top level rule should end with CeofE> to make sure all input
506             is parsed.
507            
508             The rule can define several alternative definitions separated by '|'.
509            
510             The rule definition finishes with a semi-colon ';'.
511            
512             A rule can call an anonymous sub-rule enclosed in parentheses.
513            
514             =item action
515            
516             The last item in the rule definition is a text delimited by {} with the code
517             to execute when the rule is matched. The code can use $self to refer to the
518             Parser object, and @item to refer to the values of each of the tokens and
519             sub-rules matched. The return value from the code defines the value of the
520             rule, passed to the upper level rule, or returned as the parse result.
521            
522             If no action is supplied, a default action returns an array reference with
523             the result of all tokens and sub-rules of the matched sentence.
524            
525             =item quantifiers
526            
527             Every token or sub-rule can be followed by a repetition specification:
528             '?' (zero or one), '*' (zero or more), '+' (one or more),
529             or '<+,>' (comma-separated list, comma can be replaced by any token).
530            
531             =item directives
532            
533             Directives are written with angle brackets.
534            
535             =over 4
536            
537             =item
538            
539             Can be used in a rule instead of the empty string to represent the end of input.
540            
541             =item
542            
543             Shortcut for creating lists of operators separated by tokens,
544             returns the list of rule and token values.
545            
546             =item
547            
548             Defines the start rule of the grammar. By default the first
549             defined rule is the start rule; use Cstart:E> to override that.
550            
551             =back
552            
553             =back
554            
555             =cut
556            
557             #------------------------------------------------------------------------------
558             sub parse_grammar {
559 38     38 1 9994 my($self, $text) = @_;
560            
561             # need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562             # the script that creates Parse::FSM::Parser
563 38 50   10   2039 eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
  38     19   99  
  13     20   108  
  14     29   37  
  7     22   7  
  7     19   117  
  15     20   39  
  15     11   20  
  8     22   635  
  9     15   39  
  9     9   11  
  9     17   236  
  11     18   44  
  11     6   11  
  11     5   294  
  12     3   42  
  12     8   11  
  12     10   644  
  11     2   42  
  11         12  
  11         1007  
  9         32  
  9         9  
  9         442  
  10         38  
  10         8  
  10         456  
  10         34  
  10         11  
  10         509  
  10         39  
  10         12  
  10         989  
  9         37  
  9         10  
  9         475  
  8         28  
  8         8  
  8         478  
  7         25  
  7         9  
  7         496  
  6         20  
  6         7  
  6         946  
  3         9  
  3         7  
  3         207  
  2         6  
  2         2  
  2         182  
  1         3  
  1         1  
564            
565 38         109 my $parser = Parse::FSM::Parser->new;
566 38         80 $parser->user->{fsm} = $self;
567 38         38 eval {
568 38         68 $parser->from($text); # setup lexer
569 38         71 $parser->parse;
570             };
571 38 100       454 $@ and do { $@ =~ s/\s+\z//; croak $@; };
  11         46  
  11         778  
572            
573 27         243 return;
574             }
575            
576             #------------------------------------------------------------------------------
577            
578             =head1 METHODS - USE PARSER
579            
580             =head2 parser
581            
582             Computes the Finite State Machine to execute the parser and returns a
583             L object that implements the parser.
584            
585             Useful to build the parser and execute it in the same
586             program, but with the run-time penalty of the time to setup the state tables.
587            
588             =cut
589            
590             #------------------------------------------------------------------------------
591             sub parser {
592 42     42 1 120 my($self) = @_;
593 42   100     66 our $name ||= 'Parser00000'; $name++; # new module on each call
  42         41  
594            
595 42         64 my $text = $self->_module_text($name, "-");
596 42     16   31762 eval $text; ## no critic
  9     15   1031  
  9     15   20  
  5     11   112  
  5     25   19  
  5     16   6  
  5     4   100  
  12     1   1114  
  12     3   20  
  5     1   466  
  4     8   17  
  4     1   4  
  4     0   42  
  6     0   26  
  6         7  
  1         189  
597 42 50       99 $@ and die $@;
598            
599 42         947 my $parser = $name->new;
600            
601 42         146 return $parser;
602             }
603             #------------------------------------------------------------------------------
604            
605             =head2 write_module
606            
607             Receives as input the module name and the output file name
608             and writes the parser module.
609            
610             The file name is optional; if not supplied is computed from the
611             module name by replacing C<::> by C and appending C<.pm>,
612             e.g. C.
613            
614             The generated code includes C functions for every rule
615             C found in the grammar, as a short-cut for calling C.
616            
617             =cut
618            
619             #------------------------------------------------------------------------------
620             sub write_module {
621 13     13 1 736 my($self, $name, $file) = @_;
622            
623 13 100       94 $name or croak "name not defined";
624            
625             # build file name from module name
626 22 100       45 unless (defined $file) {
627 13         35 $file = $name;
628 12         23 $file =~ s/::/\//g;
629 12         23 $file .= ".pm";
630             }
631            
632 20         27 my $text = $self->_module_text($name, $file);
633 19         3424 write_file($file, {atomic => 1}, $text);
634            
635 13         1013 return;
636             }
637            
638             #------------------------------------------------------------------------------
639             # template code for grammmar parser
640             my $TEMPLATE = <<'END_TEMPLATE';
641             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642             # Parser generated by Parse::FSM
643            
644             package # hide from CPAN indexer
645             <% $name %>;
646            
647             use strict;
648             use warnings;
649            
650             use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651            
652             <% $prolog %>
653            
654             <% $table %>
655            
656             sub new {
657             my($class, %args) = @_;
658             return $class->SUPER::new(
659             _state_table => \@state_table,
660             _start_state => $start_state,
661             %args,
662             );
663             }
664            
665             <% $epilog %>
666            
667             1;
668             END_TEMPLATE
669            
670             #------------------------------------------------------------------------------
671             # module text
672             sub _module_text {
673 57     61   67 my($self, $name, $file) = @_;
674            
675 58 50       84 $name or croak "name not defined";
676 58 100       99 $file or croak "file not defined";
677            
678 64         81 my $table = $self->_table_dump;
679            
680 63   50     395 my @template_args = (
      50        
681             DELIMITERS => [ '<%', '%>' ],
682             HASH => {
683             prolog => $self->prolog || "",
684             epilog => $self->epilog || "",
685             name => $name,
686             table => $table,
687             },
688             );
689 62         146 return fill_in_string($TEMPLATE, @template_args);
690             }
691            
692             #------------------------------------------------------------------------------
693             # dump the state table
694             sub _table_dump {
695 53     55   64 my($self) = @_;
696            
697 57         66 $self->_compute_fsm;
698            
699             #print dump($self),"\n" if $ENV{DEBUG};
700            
701 57         56 my $start_state = 0;
702 55 100 66     202 if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703 54         99 $start_state = $self->_tree->{$self->start_rule}{__state__};
704             }
705             else {
706 24         104 croak "start state not found";
707             }
708            
709 65         112 my $ret = 'my $start_state = '.$start_state.";\n".
710             'my @state_table = ('."\n";
711 59         55 my $width;
712 59         65 for my $i (0 .. $#{$self->_state_table}) {
  51         116  
713             $ret .= "\t# [$i] " .
714 260   100     701 ($self->_state_table->[$i]{__comment__} || "") .
715             "\n" .
716             "\t{ ";
717 262         179 $width = 2;
718            
719 262         181 for my $key (sort keys %{$self->_state_table->[$i]}) {
  263         681  
720 985 100       1888 next if $key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
721            
722 427         448 my $value = $self->_state_table->[$i]{$key};
723 427 100       572 $value = $value->{__state__} if ref($value) eq 'HASH';
724            
725 430 100       824 my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726            
727 430         6313 my $item_text = "$key_text => $value, ";
728 430 100       551 if (($width += length($item_text)) > 72) {
729 24         29 $ret .= "\n\t ";
730 29         29 $width = 2 + length($item_text);
731             }
732 435         482 $ret .= $item_text;
733             }
734            
735 259         296 $ret .= "},\n\n";
736             }
737 54         73 $ret .= ");\n\n";
738            
739             # dump action
740 51         42 for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
  129         156  
  54         161  
741 127         163 $ret .= $_->[1];
742             }
743            
744             # dump parse_XXX functions
745 55         53 my $length = 1;
746 55         62 while (my($name, $rule) = each %{$self->_tree}) {
  220         412  
747 172 100       327 next unless $name =~ /^[a-z]/i;
748 93 100       154 $length = length($name) if length($name) > $length;
749             }
750 54         70 while (my($name, $rule) = each %{$self->_tree}) {
  218         370  
751 170 100       277 next unless $name =~ /^[a-z]/i;
752 91         250 $ret .=
753             "sub parse_$name".
754             (" " x ($length - length($name))).
755             " { return shift->_parse($rule->{__state__}) }\n";
756             }
757            
758 50         103 return $ret;
759             }
760            
761             #------------------------------------------------------------------------------
762            
763             =head1 PRE-COMPILING THE GRAMMAR
764            
765             The setup of the parsing tables and creating the parsing module may take up
766             considerable time. Therefore it is useful to separate the parser generation
767             phase from the parsing phase.
768            
769             =head2 precompile
770            
771             A parser module can be created from a yacc-like grammar file by the
772             following command. The generated file (last parameter) is optional; if not
773             supplied is computed from the module name by replacing C<::> by C and
774             appending C<.pm>, e.g. C:
775            
776             perl -MParse::FSM - Grammar.yp Parser::Module
777             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778            
779             This is equivalent to the following Perl program:
780            
781             #!perl
782             use Parse::FSM;
783             Parse::FSM->precompile(@ARGV);
784            
785             The class method C receives as arguments the grammar file, the
786             generated module name and an optional file name, and creates the parsing module.
787            
788             =cut
789            
790             #------------------------------------------------------------------------------
791             sub precompile {
792 3     21 1 8 my($class, $grammar, $module, $file) = @_;
793            
794 7         16 my $self = $class->new;
795 8         57 my $text = read_file($grammar);
796 7         19 $self->parse_grammar($text);
797 11         17 $self->write_module($module, $file);
798            
799 17         34 return;
800             }
801            
802             #------------------------------------------------------------------------------
803             # startup code for pre-compiler
804             # borrowed from Parse::RecDescent
805             sub import {
806 8     12   15 local *_die = sub { warn @_, "\n"; exit 1; };
  17     24   27  
  32         102  
807            
808 23         47 my($package, $file, $line) = caller;
809 18 50 33     69 if (substr($file,0,1) eq '-' && $line == 0) {
810 9 0 0     21 _die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811             unless @ARGV == 2 || @ARGV == 3;
812            
813 5         54 my($grammar, $module, $file) = @ARGV;
814 1         6 eval {
815 10         15 Parse::FSM->precompile($grammar, $module, $file);
816             };
817 10 0       23 $@ and _die($@);
818            
819 10         16 exit 0;
820             }
821            
822 16         86 return;
823             }
824            
825             #------------------------------------------------------------------------------
826            
827            
828             =head1 AUTHOR
829            
830             Paulo Custodio, C<< >>
831            
832             =head1 ACKNOWLEDGEMENTS
833            
834             Calling pre-compiler on C
835             borrowed from L.
836            
837             =head1 BUGS and FEEDBACK
838            
839             Please report any bugs or feature requests through the web interface at
840             L.
841            
842             =head1 LICENSE and COPYRIGHT
843            
844             Copyright (C) 2010-2011 Paulo Custodio.
845            
846             This program is free software; you can redistribute it and/or modify it
847             under the terms of either: the GNU General Public License as published
848             by the Free Software Foundation; or the Artistic License.
849            
850             See http://dev.perl.org/licenses/ for more information.
851            
852             =cut
853            
854             1; # End of Parse::FSM