File Coverage

blib/lib/Parse/FSM.pm

Criterion	Covered	Total	%
statement	276	277	99.6
branch	80	96	83.3
condition	25	33	75.7
subroutine	55	56	98.2
pod	6	6	100.0
total	442	468	94.4

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
2
3							package Parse::FSM;
4
5							#------------------------------------------------------------------------------
6
7							=head1 NAME
8
9							Parse::FSM - Deterministic top-down parser based on a Finite State Machine
10
11							=cut
12
13							#------------------------------------------------------------------------------
14
15	7			7		24387	use strict;
	7					14
	7					178
16	7			7		34	use warnings;
	7					9
	7					216
17
18	7			7		27	use Carp; our @CARP_NOT = ('Parse::FSM');
	7					15
	7					768
19	7			7		3583	use Data::Dump 'dump';
	7					28937
	7					439
20	7			7		6071	use Text::Template 'fill_in_string';
	7					20903
	7					410
21	7			7		5151	use File::Slurp;
	7					95488
	7					862
22
23							our $VERSION = '1.12';
24
25							#------------------------------------------------------------------------------
26
27							=head1 SYNOPSIS
28
29							use Parse::FSM;
30							$fsm = Parse::FSM->new;
31
32							$fsm->prolog($text);
33							$fsm->epilog($text);
34							$fsm->add_rule($name, @elems, $action);
35							$fsm->start_rule($name);
36
37							$fsm->parse_grammar($text);
38
39							$fsm->write_module($module);
40							$fsm->write_module($module, $file);
41
42							$parser = $fsm->parser; # isa Parse::FSM::Driver
43							$parser->input(\&lexer);
44							$result = $parser->parse;
45
46							# script
47							perl -MParse::FSM - Grammar.yp Parser::Module
48							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
49
50							=head1 DESCRIPTION
51
52							This module compiles the Finite State Machine used by the
53							L parser module.
54
55							It can be used by a sequence of C calls, or by parsing a yacc-like
56							grammar in one go with C.
57
58							It can be used as a script to generate a module from a grammar file.
59
60							The result of compiling the parser can be used immediately by retrieving the
61							C object, or a pre-compiled module can be written to disk by
62							C. This module can then be used by the client code of the parser.
63
64							As usual in top-down parsers, left recursion is not supported
65							and generates an infinite loop. This parser is deterministic and does not implement backtracking.
66
67							=head1 METHODS - SETUP
68
69							=head2 new
70
71							Creates a new object.
72
73							=cut
74
75							#------------------------------------------------------------------------------
76							use Class::XSAccessor {
77	6					55	constructor => '_init',
78							accessors => [
79							'_tree', # parse tree
80							# Contains nested HASH tables with the decision tree
81							# used during parsing.
82							# Each node maps:
83							# token => next node / string with action code
84							# [subrule] => next node / string with action code
85							# [subrule]? => next node / string with action code
86							# [subrule]* => next node / string with action code
87							# __else__ => next node / string with action code
88							# The first level are the rule names.
89
90							'_state_table', # ARRAY that maps each state ID to the corresponding
91							# HASH table from tree.
92							# Copied to the generated parser module.
93
94							'_action', # map func text => [ sub name, sub text ]
95
96							'start_rule', # name start rule
97							'prolog', # code to include near the beginning of the file
98							'epilog', # code to include at the end of the file
99							'_names', # keep all generated names up to now, to be able to
100							# create unique ones
101							],
102	6			6		4386	};
	6					15383
103
104							#------------------------------------------------------------------------------
105							sub new {
106	70			70	1	40020	my($class) = @_;
107	70					497	return $class->_init(_tree => {}, _state_table => [], _action => {},
108							_names => {});
109							}
110
111							#------------------------------------------------------------------------------
112							# create a new unique name (for actions, sub-rules)
113							sub _unique_name {
114	153			153		216	my($self, $name) = @_;
115	153					188	my $id = 1;
116	153					509	while (exists $self->_names->{$name.$id}) {
117	11					33	$id++;
118							}
119	153					389	$self->_names->{$name.$id}++;
120	153					357	return $name.$id;
121							}
122
123							#------------------------------------------------------------------------------
124
125							=head1 METHODS - BUILD GRAMMAR
126
127							=head2 start_rule
128
129							Name of the grammar start rule. It defaults to the first rule added by C.
130
131							=head2 prolog, epilog
132
133							Perl code to include in the generated module near the start of the generated
134							module and near the end of it.
135
136							=head2 add_rule
137
138							Adds one rule to the parser.
139
140							$fsm->add_rule($name, @elems, $action);
141
142							C<$name> is the name of the rule, i.e. the syntactic object recognized
143							by the rule.
144
145							C<@elems> is the list of elements in sequence needed to recognize this rule.
146							Each element can be one of:
147
148							=over 4
149
150							=item *
151
152							A string that will match with that token type from the lexer.
153
154							The empty string is used to match the end of input and should
155							be present in the grammar to force the parser
156							to accept all the input;
157
158							=item *
159
160							An array reference of a list of all possible tokens to accept at this position.
161
162							=item *
163
164							A subrule name inside square brackets, optionally followed by a
165							repetition character that asks the parser to recursively descend
166							to match that subrule at the current input location.
167
168							The accepted forms are:
169
170							C<[term]> - recurse to the term rule;
171
172							C<[term]?> - term is optional;
173
174							C<[term]*> - accept zero or more terms;
175
176							C<[term]+> - accept one or more terms;
177
178							C<[term]E+,E> - accept one or more terms separated by commas,
179							any token type can be used instead of the comma;
180
181							=back
182
183							C<$action> is the Perl text of the action executed when the rule is recognized,
184							i.e. all elements were found in sequence.
185
186							It has to be enclosed in brackets C<{}>, and can use the following lexical
187							variables that are declared by the generated code:
188
189							=over 4
190
191							=item *
192
193							C<$self> : object pointer;
194
195							=item *
196
197							C<@item> : values of all the tokens or rules identified in this rule. The subrule
198							call with repetitions return an array reference containing all the found items
199							in the subrule;
200
201							=back
202
203							=cut
204
205							#------------------------------------------------------------------------------
206							# add_rule
207							# Args:
208							# rule name
209							# list of : '[rule]' '[rule]*' '[rule]?' '[rule]+' '[rule]<+SEP>' # subrules
210							# token # tokens
211							# action : '{ CODE }'
212							sub add_rule {
213	151			151	1	16838	my($self, $rule_name, @elems) = @_;
214	151					447	my $action = pop(@elems);
215
216	151	100				482	@elems or croak "missing arguments";
217	150	50				526	$rule_name =~ /^\w+$/ or croak "invalid rule name ".dump($rule_name);
218
219							# check for array-ref @elem and recurse for all alternatives
220	150					328	for my $i (0 .. $#elems) {
221	259	100				577	if (ref($elems[$i])) { # isa 'ARRAY', others cause run-time error
222	2					5	for (@{$elems[$i]}) {
	2					5
223	4					26	$self->add_rule($rule_name,
224							@elems[0 .. $i-1], $_, @elems[$i+1 .. $#elems],
225							$action);
226							}
227	2					7	return;
228							}
229							}
230
231	148					793	$self->_check_start_rule($rule_name);
232
233							# load the tree
234	148					237	my $tree = $self->_tree;
235	148					291	$tree = $self->_add_tree_node($tree, $rule_name); # load rule name
236
237	148					738	my $comment = "$rule_name :";
238
239	148					514	while (@elems) {
240	252					416	my $elem = shift @elems;
241
242							# handle subrule calls with quantifiers
243							# check if recursing for _add_list_rule
244	252	100	100			2056	if ($rule_name !~ /^_lst_/ &&
245							$elem =~ /^ \[ .* \] /x) {
246	68					157	$elem = $self->_add_list_rule($elem);
247							}
248
249	252					718	$tree->{__comment__} = $comment; # way up to this state
250
251	252	100				823	$comment .= " ".($elem =~ /^\[/ ? $elem : dump($elem));
252
253	252	100				16641	if (@elems) { # not a leaf node
254							croak "leaf and node at ($comment)"
255	105	100	100			434	if (exists($tree->{$elem}) && ref($tree->{$elem}) ne 'HASH');
256	104					428	$tree = $self->_add_tree_node($tree, $elem); # load token
257							}
258							else { # leaf node
259							croak "leaf not unique at ($comment)"
260	147	100				406	if (exists($tree->{$elem}));
261	146					289	$self->_add_tree_node($tree, $elem); # create node
262	146					506	$tree->{$elem} = $self->_add_action($action, $rule_name, $comment);
263							}
264							}
265
266	142					413	return;
267							}
268
269							#------------------------------------------------------------------------------
270							# add a list subrule, get passed a string '[subrule]*'
271							sub _add_list_rule {
272	68			68		138	my($self, $elem) = @_;
273
274	68	50				292	$elem =~ /^ \[ (\w+) \] ( [?+] \| <\+.> )? $/x
275							or croak "invalid subrule call $elem";
276	68					395	my($subrule, $quant) = ($1, $2);
277
278	68	100				423	return "[$subrule]" unless $quant; # subrule without quatifier
279
280							# create a list subrule, so that the result of the repetition is returned
281							# as an array reference
282	22					153	my $list_subrule = $self->_unique_name("_lst_".$subrule);
283
284	22	100	100			574	if ($quant eq '*' \|\| $quant eq '?') {
		100
		50
285	12					254	$self->add_rule($list_subrule, "[$subrule]$quant",
286							'{ return \@item }');
287							}
288							elsif ($quant eq '+') { # A+ -> A A*
289	5					27	$self->add_rule($list_subrule, "[$subrule]", "[$subrule]*",
290							'{ return \@item }');
291							}
292							elsif ($quant =~ /^< \+ (.) >$/x) { # A<+;> -> A Ac ; Ac : ';' A
293	5					12	my $separator = $1;
294	5					17	my $list_subrule_cont = $self->_unique_name("_lst_".$subrule);
295
296							# Ac : ';' A
297	5					21	$self->add_rule($list_subrule_cont, $separator, "[$subrule]",
298							'{ return $item[1] }');
299
300							# A Ac*
301	5					27	$self->add_rule($list_subrule, "[$subrule]", "[$list_subrule_cont]*",
302							'{ return \@item }');
303							}
304							else {
305	0					0	die; # not reached
306							}
307
308	22					55	return "[$list_subrule]";
309							}
310
311							#------------------------------------------------------------------------------
312							# add a tree node and create a new state
313							sub _add_tree_node {
314	398			398		601	my($self, $tree, $elem) = @_;
315
316	398		100			1527	$tree->{$elem} \|\|= {};
317
318							# new state?
319	398	100				857	if (! exists $tree->{__state__}) {
320	278					483	my $id = scalar(@{$self->_state_table});
	278					803
321	278					446	$tree->{__state__} = $id;
322	278					584	$self->_state_table->[$id] = $tree;
323							}
324
325	398					1213	return $tree->{$elem};
326							}
327
328							#------------------------------------------------------------------------------
329							# define start rule, except if starting with '_' (internal)
330							sub _check_start_rule {
331	151			151		1781	my($self, $rule_name) = @_;
332
333	151	100	100			911	if (! defined $self->start_rule && $rule_name =~ /^[a-z]/i) {
334	58					148	$self->start_rule($rule_name); # start rule is first defined rule
335							}
336
337	151					234	return;
338							}
339
340							#------------------------------------------------------------------------------
341							# _add_action()
342							# Create a new action or re-use an existing one. An action has to start by
343							# '{'; a new name is created and a reference to the name is
344							# returned : "\&_action_RULE"
345							sub _add_action {
346	146			146		251	my($self, $action, $rule_name, $comment) = @_;
347
348							# remove braces
349	146	100				1871	$action =~ s/ \A \s* \{ \s* (.?) \s \} \s* \z /$1/xs
350							or croak "action must be enclosed in {}";
351
352							# reuse an existing action, if any
353	142					1880	(my $cannon_action = $action) =~ s/\s+//g;
354	142	100				409	if (!$self->_action->{$cannon_action}) {
355	126					359	my $action_name = $self->_unique_name("_act_".$rule_name);
356
357							# reduce indentation
358	126					245	for ($action) {
359	126					451	my($lead_space) = /^(\t+)/m;
360	126	100				459	$lead_space and s/^$lead_space/\t/gm;
361							}
362
363							$action =
364	126	100				561	"# $comment\n".
365							"sub $action_name {".
366							($action ne '' ? "\n\tmy(\$self, \@item) = \@_;\n\t" : "").
367							$action.
368							"\n}\n\n";
369
370	126					693	$self->_action->{$cannon_action} = [ $action_name, $action ];
371							}
372							else {
373							# append this comment
374	16					123	$self->_action->{$cannon_action}[1] =~ s/^(sub)/# $comment\n$1/m;
375							}
376
377	142					804	return "\\&".$self->_action->{$cannon_action}[0];
378							}
379
380							#------------------------------------------------------------------------------
381							# compute the FSM machine
382							#
383							# expand [rule] calls into start_set(rule) => [ rule_id, next_state ]
384							# Search for all sub-rule calls, and add each of the first tokens of the subrule
385							# to the call. Repeat until no more rules added, to cope with follow sets being
386							# computed after being looked up
387							# creates FSM loops for the constructs:
388							# A -> B?
389							# A -> B*
390							sub _compute_fsm {
391	48			48		64	my($self) = @_;
392
393							# repeat until no more follow tokens added
394							# Example : A B[?*] C
395	48					63	my $changed;
396	48					56	do {
397	85					87	$changed = 0;
398
399							# check all states in turn
400	85					94	for my $state (@{$self->_state_table}) {
	85					229
401	571					1752	my %state_copy = %$state;
402	571					1571	while (my($token, $next_state) = each %state_copy) {
403	2207	100				8422	next unless my($subrule_name, $quant) =
404							$token =~ /^ \[ (.) \] ( [?] )? $/x;
405
406							my $next_state_text = ref($next_state) eq 'HASH' ?
407							$next_state->{__state__} :
408	248	100				549	$next_state;
409
410	248	50				676	my $subrule = $self->_tree->{$subrule_name}
411							or croak "rule $subrule_name not found";
412	248	50				457	ref($subrule) eq 'HASH' or die;
413
414							# call subrule on each of the subrule follow set
415							# Example : add all 'follow(B) -> call B' to current rule
416	248					520	for my $subrule_key (keys %$subrule) {
417	1062	100				2747	next if $subrule_key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
418							my $text = "[ ".$subrule->{__state__}.", ".
419							(($quant\|\|"") eq '*' ?
420							$state->{__state__} : # loop on a '*'
421	454	100	100			1950	$next_state_text # else, next state
422							)." ]";
423	454	100				710	if ($state->{$subrule_key}) {
424	307	50				690	die if $state->{$subrule_key} ne $text;
425							}
426							else {
427	147					231	$state->{$subrule_key} = $text;
428	147					208	$changed++;
429							}
430							}
431
432							# call next rule on the next rule follow set
433							# Example : add all 'follow(C) -> end' to end current rule
434	248	100				910	if (defined($quant)) {
435	56	100				103	if ($state->{__else__}) {
436	34	50				166	die if $state->{__else__} ne $next_state_text;
437							}
438							else {
439	22					41	$state->{__else__} = $next_state_text;
440	22					85	$changed++;
441							}
442							}
443							}
444							}
445							} while ($changed);
446
447	48					82	return;
448							}
449
450							#------------------------------------------------------------------------------
451
452							=head2 parse_grammar
453
454							Parses the given grammar text and adds to the parser. Example grammar follows:
455
456							{
457							# prolog
458							use MyLibrary;
459							}
460
461							main : (number \| name)+ ;
462							number : 'NUMBER' { $item[0][1] } ; # comment
463							name : 'NAME' { $item[0][1] } ; # comment
464
465							expr : ;
466
467
468
469							{
470							# epilog
471							sub util_method {...}
472							}
473
474							=over 4
475
476							=item prolog
477
478							If the text contains a code block surrounded by braces before the first rule
479							definition, the text is copied without the external braces to the prolog
480							of generated module.
481
482							=item epilog
483
484							If the text contains a code block surrounded by braces after the last rule
485							definition, the text is copied without the external braces to the epilog
486							of generated module.
487
488							=item statements
489
490							Statements are either rule definitions of directives and end with a
491							semi-colon C<;>. Comments are as in Perl, from a hash C<#> sign to
492							the end of the line.
493
494							=item rule
495
496							A rule defines one sentence to match in the grammar. The first rule defined
497							is the default start rule, i.e. the rule parsed by default on the input.
498							A rule name must start with a letter and contain only letters,
499							digits and the underscore character.
500
501							The rule definition follows after a colon and is composed of a sequence
502							of tokens (quoted strings) and sub-rules, to match in sequence. The rule matches
503							when all the tokens and sub-rules in the definition match in sequence.
504
505							The top level rule should end with CeofE> to make sure all input
506							is parsed.
507
508							The rule can define several alternative definitions separated by '\|'.
509
510							The rule definition finishes with a semi-colon ';'.
511
512							A rule can call an anonymous sub-rule enclosed in parentheses.
513
514							=item action
515
516							The last item in the rule definition is a text delimited by {} with the code
517							to execute when the rule is matched. The code can use $self to refer to the
518							Parser object, and @item to refer to the values of each of the tokens and
519							sub-rules matched. The return value from the code defines the value of the
520							rule, passed to the upper level rule, or returned as the parse result.
521
522							If no action is supplied, a default action returns an array reference with
523							the result of all tokens and sub-rules of the matched sentence.
524
525							=item quantifiers
526
527							Every token or sub-rule can be followed by a repetition specification:
528							'?' (zero or one), '*' (zero or more), '+' (one or more),
529							or '<+,>' (comma-separated list, comma can be replaced by any token).
530
531							=item directives
532
533							Directives are written with angle brackets.
534
535							=over 4
536
537							=item
538
539							Can be used in a rule instead of the empty string to represent the end of input.
540
541							=item
542
543							Shortcut for creating lists of operators separated by tokens,
544							returns the list of rule and token values.
545
546							=item
547
548							Defines the start rule of the grammar. By default the first
549							defined rule is the start rule; use Cstart:E> to override that.
550
551							=back
552
553							=back
554
555							=cut
556
557							#------------------------------------------------------------------------------
558							sub parse_grammar {
559	38			38	1	15808	my($self, $text) = @_;
560
561							# need to postpone load of Parse::FSM::Parser, as Parse::FSM is used by
562							# the script that creates Parse::FSM::Parser
563	38	50		23		2495	eval 'use Parse::FSM::Parser'; $@ and die; ## no critic
	38			27		141
	6			35		120
	14			23		54
	14			19		35
	7			19		182
	8			14		48
	8			12		15
	8			14		906
	9			19		53
	9			9		17
	9			15		304
	11			16		66
	11			6		20
	11			4		517
	12			17		62
	12			2		23
	12			5		982
	11			1		60
	11					23
	11					1378
	9					55
	9					17
	9					658
	10					59
	10					16
	10					655
	10					54
	10					20
	10					767
	10					52
	10					19
	10					1371
	9					52
	9					15
	9					703
	8					49
	8					17
	8					679
	7					36
	7					13
	7					663
	6					35
	6					12
	6					1452
	3					17
	3					7
	3					323
	2					9
	2					4
	2					312
	1					5
	1					1
564
565	38					180	my $parser = Parse::FSM::Parser->new;
566	38					154	$parser->user->{fsm} = $self;
567	38					53	eval {
568	38					135	$parser->from($text); # setup lexer
569	38					121	$parser->parse;
570							};
571	38	100				591	$@ and do { $@ =~ s/\s+\z//; croak $@; };
	11					52
	11					1000
572
573	27					339	return;
574							}
575
576							#------------------------------------------------------------------------------
577
578							=head1 METHODS - USE PARSER
579
580							=head2 parser
581
582							Computes the Finite State Machine to execute the parser and returns a
583							L object that implements the parser.
584
585							Useful to build the parser and execute it in the same
586							program, but with the run-time penalty of the time to setup the state tables.
587
588							=cut
589
590							#------------------------------------------------------------------------------
591							sub parser {
592	42			42	1	192	my($self) = @_;
593	42		100			121	our $name \|\|= 'Parser00000'; $name++; # new module on each call
	42					70
594
595	42					125	my $text = $self->_module_text($name, "-");
596	42			13		61422	eval $text; ## no critic
	6			17		1786
	15			12		53
	15			8		163
	6			18		29
	5			9		8
	12			10		137
	12			5		1401
	5			1		10
	5			1		535
	4			8		24
	4			1		9
	6			1		73
	8			0		47
	6					10
	1					279
597	42	50				127	$@ and die $@;
598
599	42					1041	my $parser = $name->new;
600
601	42					233	return $parser;
602							}
603							#------------------------------------------------------------------------------
604
605							=head2 write_module
606
607							Receives as input the module name and the output file name
608							and writes the parser module.
609
610							The file name is optional; if not supplied is computed from the
611							module name by replacing C<::> by C and appending C<.pm>,
612							e.g. C.
613
614							The generated code includes C functions for every rule
615							C found in the grammar, as a short-cut for calling C.
616
617							=cut
618
619							#------------------------------------------------------------------------------
620							sub write_module {
621	13			13	1	954	my($self, $name, $file) = @_;
622
623	13	100				113	$name or croak "name not defined";
624
625							# build file name from module name
626	21	100				57	unless (defined $file) {
627	12					44	$file = $name;
628	20					49	$file =~ s/::/\//g;
629	20					58	$file .= ".pm";
630							}
631
632	17					45	my $text = $self->_module_text($name, $file);
633	16					4861	write_file($file, {atomic => 1}, $text);
634
635	12					1265	return;
636							}
637
638							#------------------------------------------------------------------------------
639							# template code for grammmar parser
640							my $TEMPLATE = <<'END_TEMPLATE';
641							# $Id: FSM.pm,v 1.10 2013/07/27 00:34:39 Paulo Exp $
642							# Parser generated by Parse::FSM
643
644							package # hide from CPAN indexer
645							<% $name %>;
646
647							use strict;
648							use warnings;
649
650							use Parse::FSM::Driver; our @ISA = ('Parse::FSM::Driver');
651
652							<% $prolog %>
653
654							<% $table %>
655
656							sub new {
657							my($class, %args) = @_;
658							return $class->SUPER::new(
659							_state_table => \@state_table,
660							_start_state => $start_state,
661							%args,
662							);
663							}
664
665							<% $epilog %>
666
667							1;
668							END_TEMPLATE
669
670							#------------------------------------------------------------------------------
671							# module text
672							sub _module_text {
673	56			60		108	my($self, $name, $file) = @_;
674
675	52	50				112	$name or croak "name not defined";
676	52	50				123	$file or croak "file not defined";
677
678	61					141	my $table = $self->_table_dump;
679
680	60		50			593	my @template_args = (
			50
681							DELIMITERS => [ '<%', '%>' ],
682							HASH => {
683							prolog => $self->prolog \|\| "",
684							epilog => $self->epilog \|\| "",
685							name => $name,
686							table => $table,
687							},
688							);
689	64					249	return fill_in_string($TEMPLATE, @template_args);
690							}
691
692							#------------------------------------------------------------------------------
693							# dump the state table
694							sub _table_dump {
695	55			63		94	my($self) = @_;
696
697	54					142	$self->_compute_fsm;
698
699							#print dump($self),"\n" if $ENV{DEBUG};
700
701	54					90	my $start_state = 0;
702	54	100	66			310	if (defined($self->start_rule) && exists($self->_tree->{$self->start_rule})) {
703	53					172	$start_state = $self->_tree->{$self->start_rule}{__state__};
704							}
705							else {
706	15					197	croak "start state not found";
707							}
708
709	56					181	my $ret = 'my $start_state = '.$start_state.";\n".
710							'my @state_table = ('."\n";
711	57					75	my $width;
712	57					121	for my $i (0 .. $#{$self->_state_table}) {
	59					166
713							$ret .= "\t# [$i] " .
714	268		100			1094	($self->_state_table->[$i]{__comment__} \|\| "") .
715							"\n" .
716							"\t{ ";
717	266					316	$width = 2;
718
719	266					302	for my $key (sort keys %{$self->_state_table->[$i]}) {
	267					1077
720	989	100				2807	next if $key =~ /^(__(comment\|state)__\|\[.\][?]?)$/;
721
722	428					812	my $value = $self->_state_table->[$i]{$key};
723	428	100				930	$value = $value->{__state__} if ref($value) eq 'HASH';
724
725	425	100				1413	my $key_text = ($key =~ /^\w+$/) ? $key : dump($key);
726
727	425					8956	my $item_text = "$key_text => $value, ";
728	428	100				818	if (($width += length($item_text)) > 72) {
729	22					42	$ret .= "\n\t ";
730	22					37	$width = 2 + length($item_text);
731							}
732	428					717	$ret .= $item_text;
733							}
734
735	260					508	$ret .= "},\n\n";
736							}
737	56					106	$ret .= ");\n\n";
738
739							# dump action
740	56					86	for (sort {$a->[0] cmp $b->[0]} values %{$self->_action}) {
	122					217
	52					226
741	128					256	$ret .= $_->[1];
742							}
743
744							# dump parse_XXX functions
745	57					142	my $length = 1;
746	65					97	while (my($name, $rule) = each %{$self->_tree}) {
	222					678
747	185	100				488	next unless $name =~ /^[a-z]/i;
748	95	100				240	$length = length($name) if length($name) > $length;
749							}
750	47					74	while (my($name, $rule) = each %{$self->_tree}) {
	215					646
751	174	100				432	next unless $name =~ /^[a-z]/i;
752	91					373	$ret .=
753							"sub parse_$name".
754							(" " x ($length - length($name))).
755							" { return shift->_parse($rule->{__state__}) }\n";
756							}
757
758	53					167	return $ret;
759							}
760
761							#------------------------------------------------------------------------------
762
763							=head1 PRE-COMPILING THE GRAMMAR
764
765							The setup of the parsing tables and creating the parsing module may take up
766							considerable time. Therefore it is useful to separate the parser generation
767							phase from the parsing phase.
768
769							=head2 precompile
770
771							A parser module can be created from a yacc-like grammar file by the
772							following command. The generated file (last parameter) is optional; if not
773							supplied is computed from the module name by replacing C<::> by C and
774							appending C<.pm>, e.g. C:
775
776							perl -MParse::FSM - Grammar.yp Parser::Module
777							perl -MParse::FSM - Grammar.yp Parser::Module lib\Parser\Module.pm
778
779							This is equivalent to the following Perl program:
780
781							#!perl
782							use Parse::FSM;
783							Parse::FSM->precompile(@ARGV);
784
785							The class method C receives as arguments the grammar file, the
786							generated module name and an optional file name, and creates the parsing module.
787
788							=cut
789
790							#------------------------------------------------------------------------------
791							sub precompile {
792	12			18	1	30	my($class, $grammar, $module, $file) = @_;
793
794	12					36	my $self = $class->new;
795	7					14	my $text = read_file($grammar);
796	6					21	$self->parse_grammar($text);
797	4					12	$self->write_module($module, $file);
798
799	21					49	return;
800							}
801
802							#------------------------------------------------------------------------------
803							# startup code for pre-compiler
804							# borrowed from Parse::RecDescent
805							sub import {
806	23			11		61	local *_die = sub { warn @_, "\n"; exit 1; };
	19			18		49
	21					145
807
808	14					46	my($package, $file, $line) = caller;
809	16	50	33			82	if (substr($file,0,1) eq '-' && $line == 0) {
810	14	0	0			38	_die("Usage: perl -MParse::FSM - GRAMMAR MODULE::NAME [MODULE/NAME.pm]")
811							unless @ARGV == 2 \|\| @ARGV == 3;
812
813	12					38	my($grammar, $module, $file) = @ARGV;
814	10					25	eval {
815	10					31	Parse::FSM->precompile($grammar, $module, $file);
816							};
817	1	0				7	$@ and _die($@);
818
819	1					3	exit 0;
820							}
821
822	7					264	return;
823							}
824
825							#------------------------------------------------------------------------------
826
827
828							=head1 AUTHOR
829
830							Paulo Custodio, C<< >>
831
832							=head1 ACKNOWLEDGEMENTS
833
834							Calling pre-compiler on C
835							borrowed from L.
836
837							=head1 BUGS and FEEDBACK
838
839							Please report any bugs or feature requests through the web interface at
840							L.
841
842							=head1 LICENSE and COPYRIGHT
843
844							Copyright (C) 2010-2011 Paulo Custodio.
845
846							This program is free software; you can redistribute it and/or modify it
847							under the terms of either: the GNU General Public License as published
848							by the Free Software Foundation; or the Artistic License.
849
850							See http://dev.perl.org/licenses/ for more information.
851
852							=cut
853
854							1; # End of Parse::FSM