File Coverage

blib/lib/Parse/FSM.pm
Criterion Covered Total %
statement 276 277 99.6
branch 80 96 83.3
condition 25 33 75.7
subroutine 55 56 98.2
pod 6 6 100.0
total 442 468 94.4


line stmt bran cond sub pod time code
1             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2            
3             package Parse::FSM;
4            
5             #------------------------------------------------------------------------------
6            
7             =head1 NAME
8            
9             Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10            
11             =cut
12            
13             #------------------------------------------------------------------------------
14            
15 7     7   24387 use strict;
  7         14  
  7         178  
16 7     7   34 use warnings;
  7         9  
  7         216  
17            
18 7     7   27 use Carp; our @CARP_NOT = ('Parse::FSM');
  7         15  
  7         768  
19 7     7   3583 use Data::Dump 'dump';
  7         28937  
  7         439  
20 7     7   6071 use Text::Template 'fill_in_string';
  7         20903  
  7         410  
21 7     7   5151 use File::Slurp;
  7         95488  
  7         862  
22            
23             our $VERSION = '1.12';
24            
25             #------------------------------------------------------------------------------
26            
27             =head1 SYNOPSIS
28            
29             use Parse::FSM;
30             $fsm = Parse::FSM->new;
31            
32             $fsm->prolog($text);
33             $fsm->epilog($text);
34             $fsm->add_rule($name, @elems, $action);
35             $fsm->start_rule($name);
36            
37             $fsm->parse_grammar($text);
38            
39             $fsm->write_module($module);
40             $fsm->write_module($module, $file);
41            
42             $parser = $fsm->parser; # isa Parse::FSM::Driver
43             $parser->input(\&lexer);
44             $result = $parser->parse;
45            
46             # script
47             perl -MParse::FSM - Grammar.yp Parser::Module
48             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49            
50             =head1 DESCRIPTION
51            
52             This module compiles the Finite State Machine used by the
53             L parser module.
54            
55             It can be used by a sequence of C calls, or by parsing a yacc-like
56             grammar in one go with C.
57            
58             It can be used as a script to generate a module from a grammar file.
59            
60             The result of compiling the parser can be used immediately by retrieving the
61             C object, or a pre-compiled module can be written to disk by
62             C. This module can then be used by the client code of the parser.
63            
64             As usual in top-down parsers, left recursion is not supported
65             and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66            
67             =head1 METHODS - SETUP
68            
69             =head2 new
70            
71             Creates a new object.
72            
73             =cut
74            
75             #------------------------------------------------------------------------------
76             use Class::XSAccessor {
77 6         55 constructor => '_init',
78             accessors => [
79             '_tree', # parse tree
80             # Contains nested HASH tables with the decision tree
81             # used during parsing.
82             # Each node maps:
83             # token => next node / string with action code
84             # [subrule] => next node / string with action code
85             # [subrule]? => next node / string with action code
86             # [subrule]* => next node / string with action code
87             # __else__ => next node / string with action code
88             # The first level are the rule names.
89            
90             '_state_table', # ARRAY that maps each state ID to the corresponding
91             # HASH table from tree.
92             # Copied to the generated parser module.
93            
94             '_action', # map func text => [ sub name, sub text ]
95            
96             'start_rule', # name start rule
97             'prolog', # code to include near the beginning of the file
98             'epilog', # code to include at the end of the file
99             '_names', # keep all generated names up to now, to be able to
100             # create unique ones
101             ],
102 6     6   4386 };
  6         15383  
103            
104             #------------------------------------------------------------------------------
105             sub new {
106 70     70 1 40020 my($class) = @_;
107 70         497 return $class->_init(_tree => {}, _state_table => [], _action => {},
108             _names => {});
109             }
110            
111             #------------------------------------------------------------------------------
112             # create a new unique name (for actions, sub-rules)
113             sub _unique_name {
114 153     153   216 my($self, $name) = @_;
115 153         188 my $id = 1;
116 153         509 while (exists $self->_names->{$name.$id}) {
117 11         33 $id++;
118             }
119 153         389 $self->_names->{$name.$id}++;
120 153         357 return $name.$id;
121             }
122            
123             #------------------------------------------------------------------------------
124            
125             =head1 METHODS - BUILD GRAMMAR
126            
127             =head2 start_rule
128            
129             Name of the grammar start rule. It defaults to the first rule added by C.
130            
131             =head2 prolog, epilog
132            
133             Perl code to include in the generated module near the start of the generated
134             module and near the end of it.
135            
136             =head2 add_rule
137            
138             Adds one rule to the parser.
139            
140             $fsm->add_rule($name, @elems, $action);
141            
142             C<$name> is the name of the rule, i.e. the syntactic object recognized
143             by the rule.
144            
145             C<@elems> is the list of elements in sequence needed to recognize this rule.
146             Each element can be one of:
147            
148             =over 4
149            
150             =item *
151            
152             A string that will match with that token type from the lexer.
153            
154             The empty string is used to match the end of input and should
155             be present in the grammar to force the parser
156             to accept all the input;
157            
158             =item *
159            
160             An array reference of a list of all possible tokens to accept at this position.
161            
162             =item *
163            
164             A subrule name inside square brackets, optionally followed by a
165             repetition character that asks the parser to recursively descend
166             to match that subrule at the current input location.
167            
168             The accepted forms are:
169            
170             C<[term]> - recurse to the term rule;
171            
172             C<[term]?> - term is optional;
173            
174             C<[term]*> - accept zero or more terms;
175            
176             C<[term]+> - accept one or more terms;
177            
178             C<[term]E+,E> - accept one or more terms separated by commas,
179             any token type can be used instead of the comma;
180            
181             =back
182            
183             C<$action> is the Perl text of the action executed when the rule is recognized,
184             i.e. all elements were found in sequence.
185            
186             It has to be enclosed in brackets C<{}>, and can use the following lexical
187             variables that are declared by the generated code:
188            
189             =over 4
190            
191             =item *
192            
193             C<$self> : object pointer;
194            
195             =item *
196            
197             C<@item> : values of all the tokens or rules identified in this rule. The subrule
198             call with repetitions return an array reference containing all the found items
199             in the subrule;
200            
201             =back
202            
203             =cut
204            
205             #------------------------------------------------------------------------------
206             # add_rule
207             # Args:
208             # rule name
209             # list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210             # token # tokens
211             # action : '{ CODE }'
212             sub add_rule {
213 151     151 1 16838 my($self, $rule_name, @elems) = @_;
214 151         447 my $action = pop(@elems);
215            
216 151 100       482 @elems or croak "missing arguments";
217 150 50       526 $rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218            
219             # check for array-ref @elem and recurse for all alternatives
220 150         328 for my $i (0 .. $#elems) {
221 259 100       577 if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222 2         5 for (@{$elems[$i]}) {
  2         5  
223 4         26 $self->add_rule($rule_name,
224             @elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225             $action);
226             }
227 2         7 return;
228             }
229             }
230            
231 148         793 $self->_check_start_rule($rule_name);
232            
233             # load the tree
234 148         237 my $tree = $self->_tree;
235 148         291 $tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236            
237 148         738 my $comment = "$rule_name :";
238            
239 148         514 while (@elems) {
240 252         416 my $elem = shift @elems;
241            
242             # handle subrule calls with quantifiers
243             # check if recursing for _add_list_rule
244 252 100 100     2056 if ($rule_name !~ /^_lst_/ &&
245             $elem =~ /^ \[ .* \] /x) {
246 68         157 $elem = $self->_add_list_rule($elem);
247             }
248            
249 252         718 $tree->{__comment__} = $comment; # way up to this state
250            
251 252 100       823 $comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252            
253 252 100       16641 if (@elems) { # not a leaf node
254             croak "leaf and node at ($comment)"
255 105 100 100     434 if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256 104         428 $tree = $self->_add_tree_node($tree, $elem); # load token
257             }
258             else { # leaf node
259             croak "leaf not unique at ($comment)"
260 147 100       406 if (exists($tree->{$elem}));
261 146         289 $self->_add_tree_node($tree, $elem); # create node
262 146         506 $tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263             }
264             }
265            
266 142         413 return;
267             }
268            
269             #------------------------------------------------------------------------------
270             # add a list subrule, get passed a string '[subrule]*'
271             sub _add_list_rule {
272 68     68   138 my($self, $elem) = @_;
273            
274 68 50       292 $elem =~ /^ \[ (\w+) \] ( [?*+] | <\+.*> )? $/x
275             or croak "invalid subrule call $elem";
276 68         395 my($subrule, $quant) = ($1, $2);
277            
278 68 100       423 return "[$subrule]" unless $quant; # subrule without quatifier
279            
280             # create a list subrule, so that the result of the repetition is returned
281             # as an array reference
282 22         153 my $list_subrule = $self->_unique_name("_lst_".$subrule);
283            
284 22 100 100     574 if ($quant eq '*' || $quant eq '?') {
    100          
    50          
285 12         254 $self->add_rule($list_subrule, "[$subrule]$quant",
286             '{ return \@item }');
287             }
288             elsif ($quant eq '+') { # A+ -> A A*
289 5         27 $self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290             '{ return \@item }');
291             }
292             elsif ($quant =~ /^< \+ (.*) >$/x) { # A<+;> -> A Ac* ; Ac : ';' A
293 5         12 my $separator = $1;
294 5         17 my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295            
296             # Ac : ';' A
297 5         21 $self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298             '{ return $item[1] }');
299            
300             # A Ac*
301 5         27 $self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302             '{ return \@item }');
303             }
304             else {
305 0         0 die; # not reached
306             }
307            
308 22         55 return "[$list_subrule]";
309             }
310            
311             #------------------------------------------------------------------------------
312             # add a tree node and create a new state
313             sub _add_tree_node {
314 398     398   601 my($self, $tree, $elem) = @_;
315            
316 398   100     1527 $tree->{$elem} ||= {};
317            
318             # new state?
319 398 100       857 if (! exists $tree->{__state__}) {
320 278         483 my $id = scalar(@{$self->_state_table});
  278         803  
321 278         446 $tree->{__state__} = $id;
322 278         584 $self->_state_table->[$id] = $tree;
323             }
324            
325 398         1213 return $tree->{$elem};
326             }
327            
328             #------------------------------------------------------------------------------
329             # define start rule, except if starting with '_' (internal)
330             sub _check_start_rule {
331 151     151   1781 my($self, $rule_name) = @_;
332            
333 151 100 100     911 if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334 58         148 $self->start_rule($rule_name); # start rule is first defined rule
335             }
336            
337 151         234 return;
338             }
339            
340             #------------------------------------------------------------------------------
341             # _add_action()
342             # Create a new action or re-use an existing one. An action has to start by
343             # '{'; a new name is created and a reference to the name is
344             # returned : "\&_action_RULE"
345             sub _add_action {
346 146     146   251 my($self, $action, $rule_name, $comment) = @_;
347            
348             # remove braces
349 146 100       1871 $action =~ s/ \A \s* \{ \s* (.*?) \s* \} \s* \z /$1/xs
350             or croak "action must be enclosed in {}";
351            
352             # reuse an existing action, if any
353 142         1880 (my $cannon_action = $action) =~ s/\s+//g;
354 142 100       409 if (!$self->_action->{$cannon_action}) {
355 126         359 my $action_name = $self->_unique_name("_act_".$rule_name);
356            
357             # reduce indentation
358 126         245 for ($action) {
359 126         451 my($lead_space) = /^(\t+)/m;
360 126 100       459 $lead_space and s/^$lead_space/\t/gm;
361             }
362            
363             $action =
364 126 100       561 "# $comment\n".
365             "sub $action_name {".
366             ($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367             $action.
368             "\n}\n\n";
369            
370 126         693 $self->_action->{$cannon_action} = [ $action_name, $action ];
371             }
372             else {
373             # append this comment
374 16         123 $self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375             }
376            
377 142         804 return "\\&".$self->_action->{$cannon_action}[0];
378             }
379            
380             #------------------------------------------------------------------------------
381             # compute the FSM machine
382             #
383             # expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384             # Search for all sub-rule calls, and add each of the first tokens of the subrule
385             # to the call. Repeat until no more rules added, to cope with follow sets being
386             # computed after being looked up
387             # creates FSM loops for the constructs:
388             # A -> B?
389             # A -> B*
390             sub _compute_fsm {
391 48     48   64 my($self) = @_;
392            
393             # repeat until no more follow tokens added
394             # Example : A B[?*] C
395 48         63 my $changed;
396 48         56 do {
397 85         87 $changed = 0;
398            
399             # check all states in turn
400 85         94 for my $state (@{$self->_state_table}) {
  85         229  
401 571         1752 my %state_copy = %$state;
402 571         1571 while (my($token, $next_state) = each %state_copy) {
403 2207 100       8422 next unless my($subrule_name, $quant) =
404             $token =~ /^ \[ (.*) \] ( [?*] )? $/x;
405            
406             my $next_state_text = ref($next_state) eq 'HASH' ?
407             $next_state->{__state__} :
408 248 100       549 $next_state;
409            
410 248 50       676 my $subrule = $self->_tree->{$subrule_name}
411             or croak "rule $subrule_name not found";
412 248 50       457 ref($subrule) eq 'HASH' or die;
413            
414             # call subrule on each of the subrule follow set
415             # Example : add all 'follow(B) -> call B' to current rule
416 248         520 for my $subrule_key (keys %$subrule) {
417 1062 100       2747 next if $subrule_key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
418             my $text = "[ ".$subrule->{__state__}.", ".
419             (($quant||"") eq '*' ?
420             $state->{__state__} : # loop on a '*'
421 454 100 100     1950 $next_state_text # else, next state
422             )." ]";
423 454 100       710 if ($state->{$subrule_key}) {
424 307 50       690 die if $state->{$subrule_key} ne $text;
425             }
426             else {
427 147         231 $state->{$subrule_key} = $text;
428 147         208 $changed++;
429             }
430             }
431            
432             # call next rule on the next rule follow set
433             # Example : add all 'follow(C) -> end' to end current rule
434 248 100       910 if (defined($quant)) {
435 56 100       103 if ($state->{__else__}) {
436 34 50       166 die if $state->{__else__} ne $next_state_text;
437             }
438             else {
439 22         41 $state->{__else__} = $next_state_text;
440 22         85 $changed++;
441             }
442             }
443             }
444             }
445             } while ($changed);
446            
447 48         82 return;
448             }
449            
450             #------------------------------------------------------------------------------
451            
452             =head2 parse_grammar
453            
454             Parses the given grammar text and adds to the parser. Example grammar follows:
455            
456             {
457             # prolog
458             use MyLibrary;
459             }
460            
461             main : (number | name)+ ;
462             number : 'NUMBER' { $item[0][1] } ; # comment
463             name : 'NAME' { $item[0][1] } ; # comment
464            
465             expr : ;
466            
467            
468            
469             {
470             # epilog
471             sub util_method {...}
472             }
473            
474             =over 4
475            
476             =item prolog
477            
478             If the text contains a code block surrounded by braces before the first rule
479             definition, the text is copied without the external braces to the prolog
480             of generated module.
481            
482             =item epilog
483            
484             If the text contains a code block surrounded by braces after the last rule
485             definition, the text is copied without the external braces to the epilog
486             of generated module.
487            
488             =item statements
489            
490             Statements are either rule definitions of directives and end with a
491             semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492             the end of the line.
493            
494             =item rule
495            
496             A rule defines one sentence to match in the grammar. The first rule defined
497             is the default start rule, i.e. the rule parsed by default on the input.
498             A rule name must start with a letter and contain only letters,
499             digits and the underscore character.
500            
501             The rule definition follows after a colon and is composed of a sequence
502             of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503             when all the tokens and sub-rules in the definition match in sequence.
504            
505             The top level rule should end with CeofE> to make sure all input
506             is parsed.
507            
508             The rule can define several alternative definitions separated by '|'.
509            
510             The rule definition finishes with a semi-colon ';'.
511            
512             A rule can call an anonymous sub-rule enclosed in parentheses.
513            
514             =item action
515            
516             The last item in the rule definition is a text delimited by {} with the code
517             to execute when the rule is matched. The code can use $self to refer to the
518             Parser object, and @item to refer to the values of each of the tokens and
519             sub-rules matched. The return value from the code defines the value of the
520             rule, passed to the upper level rule, or returned as the parse result.
521            
522             If no action is supplied, a default action returns an array reference with
523             the result of all tokens and sub-rules of the matched sentence.
524            
525             =item quantifiers
526            
527             Every token or sub-rule can be followed by a repetition specification:
528             '?' (zero or one), '*' (zero or more), '+' (one or more),
529             or '<+,>' (comma-separated list, comma can be replaced by any token).
530            
531             =item directives
532            
533             Directives are written with angle brackets.
534            
535             =over 4
536            
537             =item
538            
539             Can be used in a rule instead of the empty string to represent the end of input.
540            
541             =item
542            
543             Shortcut for creating lists of operators separated by tokens,
544             returns the list of rule and token values.
545            
546             =item
547            
548             Defines the start rule of the grammar. By default the first
549             defined rule is the start rule; use Cstart:E> to override that.
550            
551             =back
552            
553             =back
554            
555             =cut
556            
557             #------------------------------------------------------------------------------
558             sub parse_grammar {
559 38     38 1 15808 my($self, $text) = @_;
560            
561             # need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562             # the script that creates Parse::FSM::Parser
563 38 50   23   2495 eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
  38     27   141  
  6     35   120  
  14     23   54  
  14     19   35  
  7     19   182  
  8     14   48  
  8     12   15  
  8     14   906  
  9     19   53  
  9     9   17  
  9     15   304  
  11     16   66  
  11     6   20  
  11     4   517  
  12     17   62  
  12     2   23  
  12     5   982  
  11     1   60  
  11         23  
  11         1378  
  9         55  
  9         17  
  9         658  
  10         59  
  10         16  
  10         655  
  10         54  
  10         20  
  10         767  
  10         52  
  10         19  
  10         1371  
  9         52  
  9         15  
  9         703  
  8         49  
  8         17  
  8         679  
  7         36  
  7         13  
  7         663  
  6         35  
  6         12  
  6         1452  
  3         17  
  3         7  
  3         323  
  2         9  
  2         4  
  2         312  
  1         5  
  1         1  
564            
565 38         180 my $parser = Parse::FSM::Parser->new;
566 38         154 $parser->user->{fsm} = $self;
567 38         53 eval {
568 38         135 $parser->from($text); # setup lexer
569 38         121 $parser->parse;
570             };
571 38 100       591 $@ and do { $@ =~ s/\s+\z//; croak $@; };
  11         52  
  11         1000  
572            
573 27         339 return;
574             }
575            
576             #------------------------------------------------------------------------------
577            
578             =head1 METHODS - USE PARSER
579            
580             =head2 parser
581            
582             Computes the Finite State Machine to execute the parser and returns a
583             L object that implements the parser.
584            
585             Useful to build the parser and execute it in the same
586             program, but with the run-time penalty of the time to setup the state tables.
587            
588             =cut
589            
590             #------------------------------------------------------------------------------
591             sub parser {
592 42     42 1 192 my($self) = @_;
593 42   100     121 our $name ||= 'Parser00000'; $name++; # new module on each call
  42         70  
594            
595 42         125 my $text = $self->_module_text($name, "-");
596 42     13   61422 eval $text; ## no critic
  6     17   1786  
  15     12   53  
  15     8   163  
  6     18   29  
  5     9   8  
  12     10   137  
  12     5   1401  
  5     1   10  
  5     1   535  
  4     8   24  
  4     1   9  
  6     1   73  
  8     0   47  
  6         10  
  1         279  
597 42 50       127 $@ and die $@;
598            
599 42         1041 my $parser = $name->new;
600            
601 42         233 return $parser;
602             }
603             #------------------------------------------------------------------------------
604            
605             =head2 write_module
606            
607             Receives as input the module name and the output file name
608             and writes the parser module.
609            
610             The file name is optional; if not supplied is computed from the
611             module name by replacing C<::> by C and appending C<.pm>,
612             e.g. C.
613            
614             The generated code includes C functions for every rule
615             C found in the grammar, as a short-cut for calling C.
616            
617             =cut
618            
619             #------------------------------------------------------------------------------
620             sub write_module {
621 13     13 1 954 my($self, $name, $file) = @_;
622            
623 13 100       113 $name or croak "name not defined";
624            
625             # build file name from module name
626 21 100       57 unless (defined $file) {
627 12         44 $file = $name;
628 20         49 $file =~ s/::/\//g;
629 20         58 $file .= ".pm";
630             }
631            
632 17         45 my $text = $self->_module_text($name, $file);
633 16         4861 write_file($file, {atomic => 1}, $text);
634            
635 12         1265 return;
636             }
637            
638             #------------------------------------------------------------------------------
639             # template code for grammmar parser
640             my $TEMPLATE = <<'END_TEMPLATE';
641             # $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642             # Parser generated by Parse::FSM
643            
644             package # hide from CPAN indexer
645             <% $name %>;
646            
647             use strict;
648             use warnings;
649            
650             use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651            
652             <% $prolog %>
653            
654             <% $table %>
655            
656             sub new {
657             my($class, %args) = @_;
658             return $class->SUPER::new(
659             _state_table => \@state_table,
660             _start_state => $start_state,
661             %args,
662             );
663             }
664            
665             <% $epilog %>
666            
667             1;
668             END_TEMPLATE
669            
670             #------------------------------------------------------------------------------
671             # module text
672             sub _module_text {
673 56     60   108 my($self, $name, $file) = @_;
674            
675 52 50       112 $name or croak "name not defined";
676 52 50       123 $file or croak "file not defined";
677            
678 61         141 my $table = $self->_table_dump;
679            
680 60   50     593 my @template_args = (
      50        
681             DELIMITERS => [ '<%', '%>' ],
682             HASH => {
683             prolog => $self->prolog || "",
684             epilog => $self->epilog || "",
685             name => $name,
686             table => $table,
687             },
688             );
689 64         249 return fill_in_string($TEMPLATE, @template_args);
690             }
691            
692             #------------------------------------------------------------------------------
693             # dump the state table
694             sub _table_dump {
695 55     63   94 my($self) = @_;
696            
697 54         142 $self->_compute_fsm;
698            
699             #print dump($self),"\n" if $ENV{DEBUG};
700            
701 54         90 my $start_state = 0;
702 54 100 66     310 if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703 53         172 $start_state = $self->_tree->{$self->start_rule}{__state__};
704             }
705             else {
706 15         197 croak "start state not found";
707             }
708            
709 56         181 my $ret = 'my $start_state = '.$start_state.";\n".
710             'my @state_table = ('."\n";
711 57         75 my $width;
712 57         121 for my $i (0 .. $#{$self->_state_table}) {
  59         166  
713             $ret .= "\t# [$i] " .
714 268   100     1094 ($self->_state_table->[$i]{__comment__} || "") .
715             "\n" .
716             "\t{ ";
717 266         316 $width = 2;
718            
719 266         302 for my $key (sort keys %{$self->_state_table->[$i]}) {
  267         1077  
720 989 100       2807 next if $key =~ /^(__(comment|state)__|\[.*\][?*]?)$/;
721            
722 428         812 my $value = $self->_state_table->[$i]{$key};
723 428 100       930 $value = $value->{__state__} if ref($value) eq 'HASH';
724            
725 425 100       1413 my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726            
727 425         8956 my $item_text = "$key_text => $value, ";
728 428 100       818 if (($width += length($item_text)) > 72) {
729 22         42 $ret .= "\n\t ";
730 22         37 $width = 2 + length($item_text);
731             }
732 428         717 $ret .= $item_text;
733             }
734            
735 260         508 $ret .= "},\n\n";
736             }
737 56         106 $ret .= ");\n\n";
738            
739             # dump action
740 56         86 for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
  122         217  
  52         226  
741 128         256 $ret .= $_->[1];
742             }
743            
744             # dump parse_XXX functions
745 57         142 my $length = 1;
746 65         97 while (my($name, $rule) = each %{$self->_tree}) {
  222         678  
747 185 100       488 next unless $name =~ /^[a-z]/i;
748 95 100       240 $length = length($name) if length($name) > $length;
749             }
750 47         74 while (my($name, $rule) = each %{$self->_tree}) {
  215         646  
751 174 100       432 next unless $name =~ /^[a-z]/i;
752 91         373 $ret .=
753             "sub parse_$name".
754             (" " x ($length - length($name))).
755             " { return shift->_parse($rule->{__state__}) }\n";
756             }
757            
758 53         167 return $ret;
759             }
760            
761             #------------------------------------------------------------------------------
762            
763             =head1 PRE-COMPILING THE GRAMMAR
764            
765             The setup of the parsing tables and creating the parsing module may take up
766             considerable time. Therefore it is useful to separate the parser generation
767             phase from the parsing phase.
768            
769             =head2 precompile
770            
771             A parser module can be created from a yacc-like grammar file by the
772             following command. The generated file (last parameter) is optional; if not
773             supplied is computed from the module name by replacing C<::> by C and
774             appending C<.pm>, e.g. C:
775            
776             perl -MParse::FSM - Grammar.yp Parser::Module
777             perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778            
779             This is equivalent to the following Perl program:
780            
781             #!perl
782             use Parse::FSM;
783             Parse::FSM->precompile(@ARGV);
784            
785             The class method C receives as arguments the grammar file, the
786             generated module name and an optional file name, and creates the parsing module.
787            
788             =cut
789            
790             #------------------------------------------------------------------------------
791             sub precompile {
792 12     18 1 30 my($class, $grammar, $module, $file) = @_;
793            
794 12         36 my $self = $class->new;
795 7         14 my $text = read_file($grammar);
796 6         21 $self->parse_grammar($text);
797 4         12 $self->write_module($module, $file);
798            
799 21         49 return;
800             }
801            
802             #------------------------------------------------------------------------------
803             # startup code for pre-compiler
804             # borrowed from Parse::RecDescent
805             sub import {
806 23     11   61 local *_die = sub { warn @_, "\n"; exit 1; };
  19     18   49  
  21         145  
807            
808 14         46 my($package, $file, $line) = caller;
809 16 50 33     82 if (substr($file,0,1) eq '-' && $line == 0) {
810 14 0 0     38 _die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811             unless @ARGV == 2 || @ARGV == 3;
812            
813 12         38 my($grammar, $module, $file) = @ARGV;
814 10         25 eval {
815 10         31 Parse::FSM->precompile($grammar, $module, $file);
816             };
817 1 0       7 $@ and _die($@);
818            
819 1         3 exit 0;
820             }
821            
822 7         264 return;
823             }
824            
825             #------------------------------------------------------------------------------
826            
827            
828             =head1 AUTHOR
829            
830             Paulo Custodio, C<< >>
831            
832             =head1 ACKNOWLEDGEMENTS
833            
834             Calling pre-compiler on C
835             borrowed from L.
836            
837             =head1 BUGS and FEEDBACK
838            
839             Please report any bugs or feature requests through the web interface at
840             L.
841            
842             =head1 LICENSE and COPYRIGHT
843            
844             Copyright (C) 2010-2011 Paulo Custodio.
845            
846             This program is free software; you can redistribute it and/or modify it
847             under the terms of either: the GNU General Public License as published
848             by the Free Software Foundation; or the Artistic License.
849            
850             See http://dev.perl.org/licenses/ for more information.
851            
852             =cut
853            
854             1; # End of Parse::FSM