File Coverage

blib/lib/Parse/YALALR/Read.pm

Criterion	Covered	Total	%
statement	3	210	1.4
branch	0	130	0.0
condition	0	12	0.0
subroutine	1	13	7.6
pod	0	12	0.0
total	4	377	1.0

line	stmt	bran	cond	sub	pod	time	code
1	1			1		5	use strict;
	1					1
	1					3381
2
3							package Parse::YALALR::Read;
4
5							#BEGIN { $SIG{INT} = sub { use Carp; confess "interrupted" } };
6
7							sub read {
8	0			0	0		my ($class, $lang, $input) = @_;
9	0		0				my $self = bless { language => $lang }, ref $class \|\| $class;
10	0						$self->read_parser($input);
11							}
12
13							sub read_parser {
14	0			0	0		my ($self, $input) = @_;
15
16	0						my ($pre, $rules, $post);
17
18							# Prolog
19	0						$pre = eval { $self->read_pre($input); };
	0
20	0	0					die "Parse error: $@" if $@;
21
22							# Main body
23	0						$rules = eval { $self->read_main_body($input); };
	0
24	0	0					die "Parse error: $@" if $@;
25
26	0						while (<$input>) {
27	0						$post .= $_;
28							}
29
30	0						$self->{pre} = $pre;
31	0						$self->{rules} = $rules;
32	0						$self->{post} = $post;
33
34	0						return $self;
35							}
36
37							sub read_pre {
38	0			0	0		my ($self, $input) = @_;
39	0						my $line = <$input>;
40
41	0						my $pre = "";
42
43	0						my $state = 'directives';
44
45	0						while (1) {
46	0	0					die "Parse error: Premature end of file at line $." if !defined $line;
47	0	0					last if $line =~ /^\%\%/;
48	0						$line =~ s/^\s+//;
49
50							# Skip blank lines (often caused by something below)
51	0	0					if ($line !~ /\S/) {
		0
		0
		0
		0
		0
		0
52	0						$line = <$input>;
53
54							# Discard comments
55							} elsif ($line =~ /^\/\*/) {
56	0						my $startline = $.;
57	0	0					$self->read_until(\$line, $input, '\*\/')
58							or die "Parse error: EOF in comment starting at line $startline";
59
60							# Copy over any %{ ... %} sections
61							} elsif ($line eq "\%{\n") {
62	0						$line = <$input>;
63	0	0					my $rest = $self->read_until(\$line, $input, '\%\}')
64							or die "Parse error: EOF looking for %}";
65							# Don't collect final %}
66	0						$pre .= substr($rest, 0, -2);
67
68							# Handle token declarations
69							} elsif ($line =~ /^\%(left\|right\|nonassoc\|token\|term)/g) {
70	0						my $associativity = $1;
71	0						$line = substr($line, 1 + pos($line));
72	0						$self->read_tokens(\$line, $input, $associativity);
73
74							# Handle symbol type declarations
75							} elsif ($line =~ /^\%type\s+\<.*?\>/g) {
76	0						my $type = $1;
77	0						$line = substr($line, pos($line));
78	0						my @symbols = $self->read_symbols(\$line, $input);
79	0						$self->{symbol_type}{$_} = $type foreach (@symbols);
80	0	0					if ($line =~ /^;/) { $line = substr($line, 1) };
	0
81
82							# Handle %start declaration
83							} elsif ($line =~ /^\%start\s+(.*)/) {
84	0	0					die "Parse error line $.: \%start redefines start symbol (was $self->{start_symbol})"
85							if exists $self->{start_symbol};
86	0						$self->{start_symbol} = $1;
87	0						$line = <$input>;
88
89							# Handle %union declaration
90							} elsif ($line =~ /^\%union/) {
91	0						my $union = $self->read_until(\$line, $input, '\{');
92							# print STDERR "$union\n";
93	0						my $depth = 1;
94	0						while ($depth) {
95	0						my $u;
96	0						$union .= ($u = $self->read_until(\$line, $input, '[\{\}]'));
97							# print STDERR "$u";
98	0	0					if ($u =~ /\{$/) {
		0
99	0						$depth++;
100							} elsif ($u =~ /\}$/) {
101	0						$depth--;
102							} else {
103	0						die "Parse error: EOF in %union declaration";
104							}
105							# print STDERR "\n";
106							}
107	0						$self->skip_ws(\$line, $input);
108	0						$line =~ s/^;//;
109	0						$self->{value_union} = $union;
110
111							# Die on anything else
112							} else {
113	0						die "Parse error: Unrecognized directive in line $.: $line";
114							}
115							}
116
117	0						return $pre;
118							}
119
120							sub read_tokens {
121	0			0	0		my ($self, $line, $input, $associativity) = @_;
122
123	0	0					$associativity = 'token' if $associativity eq 'term';
124
125	0						my $type = '';
126
127	0	0					if ($$line =~ s/^\s(\<.?\>)//) {
128	0						$type = $1;
129	0						$self->skip_ws($line, $input);
130							}
131
132	0						my @tokens = $self->read_symbols($line, $input);
133
134	0						$self->{token_type}{$_} = $type foreach (@tokens);
135	0						push(@{$self->{tokens}}, @tokens);
	0
136	0						push(@{$self->{precedence}}, [ $associativity, \@tokens ]);
	0
137	0						return 1;
138							}
139
140							# Read a bunch of symbols -- tokens or nonterminals -- up to the next % or ;
141							sub read_symbols {
142	0			0	0		my ($self, $line, $input) = @_;
143
144	0						my @symbols;
145	0						while (1) {
146	0						$self->skip_ws($line, $input);
147	0	0					last if ($$line =~ /^[\%;]/);
148	0						my $symbol = $self->read_symbol($line, $input);
149	0						push(@symbols, $symbol);
150							}
151
152	0						return @symbols;
153							}
154
155							sub read_main_body {
156	0			0	0		my ($self, $input) = @_;
157	0						my $line = <$input>;
158	0	0					die "Parse error: Premature EOF in main body at line $." if !defined $line;
159
160	0						my @rules;
161	0						while (1) {
162	0						my @ruleset = $self->read_ruleset(\$line, $input);
163	0	0					last if @ruleset == 0;
164	0						push(@rules, @ruleset);
165							}
166
167	0						return \@rules;
168							}
169
170							# Returns : ( [ lhs, [ symbol \| code ] ] )
171							sub read_ruleset {
172	0			0	0		my ($self, $line, $input) = @_;
173
174	0						my @rules;
175
176	0	0					defined $self->skip_ws($line, $input)
177							or return ();
178
179	0	0					return () if $$line =~ /^\%/;
180	0						my $lhs = $self->read_symbol($line, $input);
181
182	0						$self->skip_ws($line, $input);
183
184	0	0					die "Parse error: colon expected in production at line $."
185							if ($$line !~ /^:/);
186	0						$$line = substr($$line, 1);
187
188	0						my $sawnext;
189	0						while (1) {
190	0						my @rhs;
191	0						my $precedence_progenitor = undef; # Paranoia
192
193	0	0					defined $self->skip_ws($line, $input)
194							or last;
195
196	0						while (1) {
197							# print "LINE=$$line\n";
198	0						my $startline = $.;
199	0	0					if ($$line =~ /^:/) {
		0
		0
		0
		0
		0
		0
		0
		0
200							# Saw next rule; back up one symbol and remember we're done
201	0						$$line = pop(@rhs)." $$line";
202	0						$sawnext = 1;
203	0						last;
204							} elsif ($$line =~ /^\%\%/) {
205							# Saw %%, remember we're done
206	0						$sawnext = 1;
207	0						last;
208							} elsif ($$line =~ /^\\|/) {
209							# Saw vbar, stop this rhs
210	0						$$line = substr($$line, 1);
211	0						last;
212							} elsif ($$line =~ /^\;/) {
213							# Saw ; (means nothing any more)
214	0						$$line = substr($$line, 1);
215							} elsif ($$line =~ /^\{:/) {
216	0						$$line = substr($$line, 2);
217	0	0					my $code = $self->read_until($line, $input, ':\}')
218							or die "Parse error: EOF in {: code section starting at line $startline";
219	0						$code = substr($code, 0, -2);
220	0						push(@rhs, (bless \$code, $self->{language}.'CODE'));
221							} elsif ($$line =~ /^\{\?/) {
222	0	0					my $code = $self->read_until($line, $input, '\?\}')
223							or die "Parse error: EOF in {? code section starting at line $startline";
224	0						$code = substr($code, 0, -2);
225	0						push(@rhs, (bless \$code, $self->{language}.'CONDITION'));
226							} elsif ($$line =~ /^\{/) {
227	0						$$line = substr($$line, 1);
228	0	0					my $code = $self->read_code($line, $input)
229							or die "Parse error: EOF in { code section starting at line $startline";
230	0						$code = substr($code, 0, -1);
231	0						push(@rhs, (bless \$code, $self->{language}.'CODE'));
232							} elsif ($$line =~ /^\/\*/) {
233	0						$self->read_comment($line, $input);
234							} elsif ($$line =~ /^\%prec\s*/g) {
235	0						$$line = substr($$line, pos($$line));
236	0						$precedence_progenitor = $self->read_symbol($line, $input);
237							# BUG(?): accepts "lhs: a b %prec x c d"
238							# (equiv to "lhs: a b c d %prec x")
239							} else {
240	0						my $sym = $self->read_symbol($line, $input);
241	0						push(@rhs, $sym);
242							# print "Got symbol $sym, line=$$line\n";
243							}
244
245	0	0					defined $self->skip_ws($line, $input)
246							or last;
247							}
248
249	0						push(@rules, [ $lhs, \@rhs, $precedence_progenitor ]);
250
251	0	0					last if $sawnext;
252							}
253
254							# print "Ruleset for $lhs done $., next LINE=$$line\n";
255
256	0						return @rules;
257							}
258
259							sub read_symbol {
260	0			0	0		my ($self, $line, $input) = @_;
261
262	0						my $symbol;
263							# print "read_symbol($$line)\n";
264	0	0	0				($symbol) = ($$line =~ /^\s*(\w+)/)
265							or ($symbol) = ($$line =~ /^(\'.*?\')/)
266							or ($symbol) = ($$line =~ /^(\".*?\")/);
267
268	0	0					die "Parse error: Expected symbol, none found at line $."
269							if !defined $symbol;
270
271	0						$$line = substr($$line, length($symbol));
272	0						return $symbol;
273							}
274
275							# Assume there is a newline at the end of every line. Then this routine
276							# becomes the main source of input.
277							sub skip_ws {
278	0			0	0		my ($self, $line, $input) = @_;
279	0						my $ws = '';
280	0						my $comment = 0;
281	0						while (1) {
282	0	0	0				if (!$comment && $$line =~ /^\s\/\/g) {
		0	0
		0
		0
		0
283							# Beginning of C style comment: /* comment
284	0						$ws .= substr($$line, 0, pos($$line));
285	0						$$line = substr($$line, pos($$line));
286	0						$comment = 1;
287							} elsif ($comment && $$line =~ /\*\//g) {
288							# End of C style comment: */ stuff
289	0						$ws .= substr($$line, 0, pos($$line));
290	0						$$line = substr($$line, pos($$line));
291	0						$comment = 0;
292							} elsif ($comment) {
293							# Middle of C style comment
294	0						$ws .= $$line;
295	0						$$line = <$input>;
296	0	0					die "Parse error: EOF in comment" if !defined $$line;
297							} elsif ($$line =~ m,^\s*//,) {
298							# C++ style comments: // comment
299	0						$ws .= $$line;
300	0						$$line = <$input>;
301	0	0					die "Parse error: EOF in // comment at line $." if !defined $$line;
302							} elsif ($$line =~ /^\S/) {
303							# Good stuff
304	0						return $ws;
305							} else {
306							# Whitespace
307	0						$$line =~ s/^(\s+)//;
308	0						$ws .= $1;
309
310	0	0					if ($$line eq '') { $$line = <$input>; }
	0
311	0	0					return undef if !defined $$line;
312							}
313							}
314							}
315
316							sub read_comment {
317	0			0	0		my ($self, $line, $input) = @_;
318	0						my $comment;
319	0	0					if ($$line =~ /\*\//) {
320	0						($comment) = $$line =~ s/\/\(.?)\*\///;
321							} else {
322	0						$$line = substr($line, 2); # Chop off leading /*
323	0						while (1) {
324	0						$_ = <$input>;
325	0	0					die "Parse error: EOF in comment" if !defined $_;
326	0	0					if (/\*\//g) {
327	0						$comment = $$line . substr($_, 0, (pos) - 2);
328	0						$$line = substr($_, pos);
329							}
330							}
331							}
332
333	0						return $comment;
334							}
335
336							sub read_code {
337	0			0	0		my ($self, $line, $input) = @_;
338
339	0						my $code = '';
340	0						my $level = 1; # {} nesting level
341
342							# Handle a few special things:
343							# "double quoted strings"
344							# 'single quoted strings'
345							# { balanced {} exprs }
346
347	0						while ($level > 0) {
348	0						$self->skip_ws($line, $input);
349							# Scan to next ", ', {, }, or / (the last for comment starts)
350	0	0					if ($$line =~ /[\"\'\{\}\/]/g) {
351	0						$code .= substr($$line, 0, pos($$line));
352	0						my $char = substr($$line, pos($$line) - 1, 1);
353	0						$$line = substr($$line, pos($$line));
354
355	0	0					if ($char eq '"') {
		0
		0
		0
356	0	0					if ($$line =~ /([^\"\\]\|\\.)*\"/g) {
357	0						$code .= substr($$line, 0, pos($$line));
358	0						$$line = substr($$line, pos($$line));
359							} else {
360	0	0					defined (my $i = <$input>)
361							or die "Parse error: EOF in double-quoted string";
362	0						$$input .= $i;
363							}
364							} elsif ($char eq '\'') {
365	0	0					if ($$line =~ /([^\'\\]\|\\.)*\'/g) {
366	0						$code .= substr($$line, 0, pos($$line));
367	0						$$line = substr($$line, pos($$line));
368							} else {
369	0	0					defined(my $i = <$input>)
370							or die "Parse error: EOF in single-quoted string";
371	0						$$line .= $i;
372							}
373							} elsif ($char eq '{') {
374	0						$level++;
375							} elsif ($char eq '}') {
376	0						$level--;
377							}
378							} else {
379							# No interesting characters found
380	0						$code .= $$line;
381	0	0					defined ($$line = <$input>)
382							or die "Parse error: EOF in { code section before final close brace";
383							}
384							}
385
386	0						return $code;
387							}
388
389							######## UTILITY ###########
390
391							sub read_until {
392	0			0	0		my ($self, $line, $input, $pattern) = @_;
393	0						my $buf = $$line;
394	0						while ($buf !~ /$pattern/) {
395	0						$$line = <$input>;
396	0	0					return undef if !defined $$line;
397	0						$buf .= $$line;
398							}
399	0						$buf =~ /$pattern/g;
400	0						$$line = substr($buf, pos($buf));
401	0						return substr($buf, 0, pos($buf));
402							}
403
404							1;