| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Parse::Earley; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
#Parse::Earley |
|
4
|
|
|
|
|
|
|
#By Luke Palmer |
|
5
|
|
|
|
|
|
|
#Copyright (C) 2002, Luke Palmer. All rights reserved. |
|
6
|
|
|
|
|
|
|
#This module is free software. It may used, redistributed, and/or modified |
|
7
|
|
|
|
|
|
|
#under the terms of the Perl Artistic Licence: |
|
8
|
|
|
|
|
|
|
# http://www.perl.com/perl/misc/Artistic.html |
|
9
|
|
|
|
|
|
|
|
|
10
|
1
|
|
|
|
|
111
|
use Text::Balanced qw( extract_quotelike extract_codeblock |
|
11
|
1
|
|
|
1
|
|
17579
|
extract_bracketed extract_multiple ); |
|
|
1
|
|
|
|
|
22902
|
|
|
12
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
49
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
3253
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.15'; |
|
17
|
|
|
|
|
|
|
our $DEBUG; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub new($) |
|
20
|
|
|
|
|
|
|
{ |
|
21
|
6
|
|
|
6
|
0
|
664
|
my $self = bless { |
|
22
|
|
|
|
|
|
|
rules => { }, |
|
23
|
|
|
|
|
|
|
sets => { }, # Sparse array by pos() |
|
24
|
|
|
|
|
|
|
set => [ ], # Not an index, rather, something to be pushed |
|
25
|
|
|
|
|
|
|
ncset => [ ], |
|
26
|
|
|
|
|
|
|
skip => qr/\s*/, |
|
27
|
|
|
|
|
|
|
no_code => undef, |
|
28
|
|
|
|
|
|
|
} => shift |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub pushset(\$$) |
|
32
|
|
|
|
|
|
|
{ |
|
33
|
|
|
|
|
|
|
# rule lhs dot pos ref |
|
34
|
180
|
|
|
180
|
0
|
232
|
my ($self, $set) = @_; |
|
35
|
180
|
|
|
|
|
206
|
my $change = 0; |
|
36
|
180
|
|
|
|
|
291
|
for my $state (@$set) { |
|
37
|
181
|
|
|
|
|
329
|
my $s = $self->{sets}{$state->{pos}}; |
|
38
|
|
|
|
|
|
|
# unless (grep { $_->{rule} == $state->{rule} && |
|
39
|
|
|
|
|
|
|
# $_->{lhs} eq $state->{lhs} && |
|
40
|
|
|
|
|
|
|
# $_->{dot} == $state->{dot} && |
|
41
|
|
|
|
|
|
|
# $_->{pos} == $state->{pos} && |
|
42
|
|
|
|
|
|
|
# $_->{ref} == $state->{ref} } |
|
43
|
181
|
100
|
|
|
|
184
|
unless (grep { $_ == $state } |
|
|
278
|
|
|
|
|
738
|
|
|
|
181
|
|
|
|
|
396
|
|
|
44
|
|
|
|
|
|
|
@{$self->{sets}{$state->{pos}}} ) { |
|
45
|
150
|
|
|
|
|
155
|
push @{$self->{sets}{$state->{pos}}}, $state; |
|
|
150
|
|
|
|
|
318
|
|
|
46
|
150
|
|
|
|
|
438
|
$change++; |
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
} |
|
49
|
180
|
|
|
|
|
489
|
return $change; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# XXX: This is a I simple processor: make it better |
|
53
|
|
|
|
|
|
|
sub grammar(\$$) |
|
54
|
|
|
|
|
|
|
{ |
|
55
|
6
|
|
|
6
|
0
|
828
|
my ($self, $g) = @_; |
|
56
|
6
|
|
|
|
|
11
|
local $_ = $g; # XXX: Pseudo hack. I don't know why I can't |
|
57
|
|
|
|
|
|
|
# extract_multiple($g, ...) |
|
58
|
|
|
|
|
|
|
my @toks = extract_multiple($_, [ |
|
59
|
|
|
|
|
|
|
qr/[a-zA-Z_]\w*\s*:/, # LHS |
|
60
|
|
|
|
|
|
|
qr/[a-zA-Z_]\w*/, # Nonterminal |
|
61
|
134
|
|
|
134
|
|
21704
|
sub { scalar extract_quotelike($_); }, |
|
62
|
122
|
|
|
122
|
|
5771
|
sub { scalar extract_bracketed($_, '<>') }, |
|
63
|
121
|
|
|
121
|
|
7417
|
sub { scalar extract_codeblock($_, '{}') }, |
|
64
|
6
|
|
|
|
|
101
|
qr/\|/, |
|
65
|
|
|
|
|
|
|
qr/#.*/m, |
|
66
|
|
|
|
|
|
|
]); |
|
67
|
6
|
|
|
|
|
731
|
my @rulebuf; |
|
68
|
|
|
|
|
|
|
my $curule; |
|
69
|
0
|
|
|
|
|
0
|
my $errors; |
|
70
|
0
|
|
|
|
|
0
|
my $noskip_f; |
|
71
|
0
|
|
|
|
|
0
|
my $lineno; |
|
72
|
|
|
|
|
|
|
|
|
73
|
6
|
|
|
|
|
13
|
for (@toks) |
|
74
|
|
|
|
|
|
|
{ |
|
75
|
125
|
|
|
|
|
396
|
$lineno++ for (/\n/g); |
|
76
|
|
|
|
|
|
|
|
|
77
|
125
|
|
|
|
|
263
|
my %cp = (line => $lineno); |
|
78
|
125
|
100
|
|
|
|
251
|
if ($noskip_f) { |
|
79
|
1
|
|
|
|
|
2
|
$noskip_f = 0; |
|
80
|
1
|
|
|
|
|
5
|
%cp = (%cp, noskip => 1); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
125
|
|
|
|
|
261
|
s/^\s+//; |
|
84
|
125
|
|
|
|
|
177
|
s/\s+$//; |
|
85
|
|
|
|
|
|
|
|
|
86
|
125
|
100
|
100
|
|
|
1050
|
if (/^(\w+)\s*:$/) { # LHS |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
87
|
18
|
|
|
|
|
21
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
|
18
|
|
|
|
|
89
|
|
|
88
|
18
|
|
|
|
|
31
|
undef @rulebuf; |
|
89
|
18
|
|
|
|
|
80
|
$curule = $1; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
elsif (/^\|$/) { |
|
92
|
6
|
|
|
|
|
13
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
|
6
|
|
|
|
|
22
|
|
|
93
|
6
|
|
|
|
|
17
|
undef @rulebuf; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
elsif (/^(\w+)$/) { # Nonterminal |
|
96
|
24
|
|
|
|
|
173
|
push @rulebuf, { %cp, |
|
97
|
|
|
|
|
|
|
match => $1, |
|
98
|
|
|
|
|
|
|
type => 'nonterminal' }; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
elsif (/^['"]/) { # Terminal |
|
101
|
7
|
|
|
|
|
514
|
push @rulebuf, { %cp, |
|
102
|
|
|
|
|
|
|
match => eval "$_", |
|
103
|
|
|
|
|
|
|
type => 'literal' }; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
elsif (/^q/) { |
|
106
|
0
|
|
|
|
|
0
|
push @rulebuf, { %cp, |
|
107
|
|
|
|
|
|
|
match => eval "$_", |
|
108
|
|
|
|
|
|
|
type => 'literal' }; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
elsif (/^\/(.*)\/$/ || /^m.(.*).$/) { |
|
111
|
5
|
|
|
|
|
58
|
push @rulebuf, { %cp, |
|
112
|
|
|
|
|
|
|
match => qr/$1/, |
|
113
|
|
|
|
|
|
|
type => 'regex' }; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
elsif (/^<\s*(.*)\s*>$/) { |
|
116
|
1
|
|
|
|
|
3
|
my $dir = $1; |
|
117
|
1
|
50
|
|
|
|
5
|
if ($dir eq 'noskip') { |
|
118
|
1
|
|
|
|
|
3
|
$noskip_f = 1; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
else { |
|
121
|
0
|
|
|
|
|
0
|
$errors .= "Unrecognized directive: <$dir> at line $lineno\n"; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
elsif (/^{/) { |
|
125
|
0
|
0
|
|
|
|
0
|
if ($self->{no_code}) { |
|
|
|
0
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
$errors .= "Code not allowed\n"; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
elsif (@rulebuf) { |
|
129
|
0
|
|
|
|
|
0
|
$rulebuf[$#rulebuf]{code} = eval "sub $_"; |
|
130
|
0
|
0
|
|
|
|
0
|
$errors .= "$@\n" if $@; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
else { |
|
133
|
0
|
|
|
|
|
0
|
$errors .= "Condition does not follow anything at line $lineno\n"; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
elsif (/^#/ || /^\s*$/) { |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
else { |
|
139
|
0
|
|
|
|
|
0
|
$errors .= "Unrecognized pattern '$_' at line $lineno\n"; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
} |
|
142
|
6
|
|
|
|
|
11
|
push @{$self->{rules}{$curule}}, [ @rulebuf ]; |
|
|
6
|
|
|
|
|
20
|
|
|
143
|
6
|
50
|
|
|
|
14
|
if ($errors) { |
|
144
|
0
|
|
|
|
|
0
|
croak $errors; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
else { |
|
147
|
6
|
|
|
|
|
39
|
return 1; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub start(\$$) |
|
152
|
|
|
|
|
|
|
{ |
|
153
|
7
|
|
|
7
|
0
|
36
|
my ($self, $rule) = @_; |
|
154
|
7
|
|
|
|
|
8
|
my @newset; |
|
155
|
7
|
|
|
|
|
17
|
$self->{sets} = { }; |
|
156
|
7
|
|
|
|
|
26
|
$self->{set} = [ ]; |
|
157
|
7
|
|
|
|
|
34
|
$self->{ncset} = [ ]; |
|
158
|
7
|
|
|
|
|
14
|
for (@{$self->{rules}{$rule}}) { |
|
|
7
|
|
|
|
|
53
|
|
|
159
|
7
|
|
|
|
|
40
|
push @newset, { rule => $_, |
|
160
|
|
|
|
|
|
|
lhs => $rule, |
|
161
|
|
|
|
|
|
|
dot => 0, |
|
162
|
|
|
|
|
|
|
pos => 0, |
|
163
|
|
|
|
|
|
|
ref => 0 }; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
7
|
|
|
|
|
12
|
push @{$self->{ncset}}, @newset; |
|
|
7
|
|
|
|
|
16
|
|
|
166
|
7
|
|
|
|
|
31
|
$self->pushset(\@newset); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub advance(\$$) |
|
170
|
|
|
|
|
|
|
{ |
|
171
|
52
|
|
|
52
|
0
|
221
|
my ($self, $str) = @_; |
|
172
|
|
|
|
|
|
|
|
|
173
|
52
|
|
|
|
|
114
|
$self->pushset($self->{ncset}); |
|
174
|
52
|
|
|
|
|
95
|
$self->{set} = $self->{ncset}; |
|
175
|
52
|
|
|
|
|
108
|
$self->{ncset} = [ ]; |
|
176
|
|
|
|
|
|
|
|
|
177
|
52
|
|
|
|
|
58
|
for (@{$self->{set}}) { |
|
|
52
|
|
|
|
|
109
|
|
|
178
|
|
|
|
|
|
|
#This is the main huffmanized switch. |
|
179
|
|
|
|
|
|
|
#The heart of the algorithm is here. |
|
180
|
|
|
|
|
|
|
|
|
181
|
174
|
|
|
|
|
474
|
my $p = $_->{rule}[$_->{dot}]; |
|
182
|
174
|
100
|
|
|
|
290
|
if ($p) { |
|
183
|
96
|
100
|
|
|
|
180
|
if ($p->{type} eq 'nonterminal') { |
|
184
|
43
|
|
|
|
|
156
|
$self->predict($_); |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
else { |
|
187
|
53
|
|
|
|
|
115
|
$self->scan($_, $str); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
else { |
|
191
|
78
|
|
|
|
|
148
|
$self->complete($_); |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
52
|
50
|
|
|
|
226
|
if ($DEBUG) { |
|
196
|
0
|
|
|
|
|
0
|
my $displen = 7; |
|
197
|
0
|
|
|
|
|
0
|
my $sp = ' ' x 4; |
|
198
|
0
|
|
|
|
|
0
|
for (qw(set ncset)) { |
|
199
|
0
|
0
|
|
|
|
0
|
print /nc/ ? 'advance(): NEXT SET' : 'advance(): CURRENT SET'; |
|
200
|
0
|
|
|
|
|
0
|
print "\n"; |
|
201
|
0
|
|
|
|
|
0
|
for my $state (@{$self->{$_}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
202
|
0
|
|
|
|
|
0
|
print $sp; |
|
203
|
0
|
|
|
|
|
0
|
my (@p); |
|
204
|
0
|
0
|
|
|
|
0
|
if ($state->{pos} < $displen) { |
|
205
|
0
|
|
|
|
|
0
|
$p[0] = substr($str, 0, $state->{pos}); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
else { |
|
208
|
0
|
|
|
|
|
0
|
$p[0] = substr($str, $state->{pos}-$displen, $displen); |
|
209
|
|
|
|
|
|
|
} |
|
210
|
0
|
|
|
|
|
0
|
$p[1] = substr($str, $state->{pos}, 5); |
|
211
|
0
|
0
|
0
|
|
|
0
|
s/(.)/ord($1)<32 || ord($1)>127 ? '.' : $1/seg for @p; |
|
|
0
|
|
|
|
|
0
|
|
|
212
|
0
|
|
|
|
|
0
|
printf "\%${displen}s * \%-${displen}s\%s", $p[0], $p[1], $sp; |
|
213
|
0
|
|
|
|
|
0
|
print "($state->{pos}) $state->{lhs}: "; |
|
214
|
0
|
|
0
|
|
|
0
|
for (my $i= 0; $i < @{$state->{rule}} || $i <= $state->{dot}; $i++){ |
|
|
0
|
|
|
|
|
0
|
|
|
215
|
0
|
0
|
|
|
|
0
|
if ($state->{dot} == $i) { |
|
216
|
0
|
|
|
|
|
0
|
print "* "; |
|
217
|
|
|
|
|
|
|
} |
|
218
|
0
|
0
|
|
|
|
0
|
if (exists $state->{rule}[$i]) { |
|
219
|
0
|
|
|
|
|
0
|
my $t = $state->{rule}[$i]{type}; |
|
220
|
0
|
|
|
|
|
0
|
my $p = $state->{rule}[$i]{match}; |
|
221
|
0
|
0
|
|
|
|
0
|
if ($t eq 'literal') { |
|
|
|
0
|
|
|
|
|
|
|
222
|
0
|
|
|
|
|
0
|
print "'$p' " |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
elsif ($t eq 'regex') { |
|
225
|
0
|
|
|
|
|
0
|
$p =~ s/^.*?://; # Get rid of the qr// stuff |
|
226
|
0
|
|
|
|
|
0
|
$p =~ s/\)$//; |
|
227
|
0
|
|
|
|
|
0
|
print "/$p/ " |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
else { |
|
231
|
0
|
|
|
|
|
0
|
print "$p "; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
0
|
|
|
|
|
0
|
print "($state->{ref})\n"; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#This function checks for matching the entire input. Sub matches |
|
244
|
|
|
|
|
|
|
#are seldom of use, and so they are discarded (as they make the |
|
245
|
|
|
|
|
|
|
#parse graph needlessly huge). |
|
246
|
|
|
|
|
|
|
sub matches(\$$$) |
|
247
|
|
|
|
|
|
|
{ |
|
248
|
7
|
|
|
7
|
0
|
14
|
my ($self, $str, $rule) = @_; |
|
249
|
7
|
|
|
|
|
71
|
$str =~ s/$self->{skip}$//; |
|
250
|
7
|
100
|
|
|
|
43
|
my $cset = $self->{sets}{length($str)} or return; |
|
251
|
6
|
|
|
|
|
10
|
return grep { $_->{lhs} eq $rule && |
|
|
6
|
|
|
|
|
52
|
|
|
252
|
26
|
100
|
66
|
|
|
117
|
$_->{dot} == @{$_->{rule}} && |
|
253
|
|
|
|
|
|
|
$_->{ref} == 0 } |
|
254
|
|
|
|
|
|
|
@$cset; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub matches_all(\$$$) |
|
258
|
|
|
|
|
|
|
{ |
|
259
|
6
|
|
|
6
|
0
|
42
|
my ($self, $str, $rule) = @_; |
|
260
|
6
|
|
|
|
|
12
|
my $cset = $self->{ncset}; |
|
261
|
6
|
50
|
|
|
|
15
|
unless (@$cset) { |
|
262
|
6
|
|
|
|
|
21
|
return $self->matches($str, $rule); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
0
|
|
|
|
|
0
|
return; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub fails(\$$$) |
|
268
|
|
|
|
|
|
|
{ |
|
269
|
1
|
|
|
1
|
0
|
7
|
my ($self, $str, $rule) = @_; |
|
270
|
1
|
50
|
|
|
|
4
|
if ($self->matches($str, $rule)) { |
|
271
|
0
|
|
|
|
|
0
|
return 0; |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
else { |
|
274
|
1
|
50
|
|
|
|
2
|
return @{$self->{set}} ? 0 : 1; |
|
|
1
|
|
|
|
|
7
|
|
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub predict(\$$) |
|
279
|
|
|
|
|
|
|
{ |
|
280
|
43
|
|
|
43
|
0
|
59
|
my ($self, $state) = @_; |
|
281
|
43
|
|
|
|
|
64
|
my $cset = $self->{set}; |
|
282
|
43
|
|
|
|
|
75
|
my $p = $state->{rule}[$state->{dot}]; |
|
283
|
|
|
|
|
|
|
|
|
284
|
43
|
50
|
|
|
|
105
|
unless ($self->{rules}{$p->{match}}) { |
|
285
|
0
|
|
|
|
|
0
|
croak "No definition for nonterminal '$p->{match}'\n"; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
43
|
|
|
|
|
51
|
my @newset = @{$self->{rules}{$p->{match}}}; |
|
|
43
|
|
|
|
|
114
|
|
|
288
|
62
|
|
|
|
|
69
|
@newset = map { |
|
289
|
43
|
|
|
|
|
70
|
my $m = $_; |
|
290
|
62
|
100
|
66
|
|
|
99
|
unless (grep { $_->{rule} == $m && |
|
|
126
|
100
|
100
|
|
|
629
|
|
|
|
|
|
66
|
|
|
|
|
|
291
|
|
|
|
|
|
|
$_->{lhs} eq $p->{match} && |
|
292
|
|
|
|
|
|
|
$_->{dot} == 0 && |
|
293
|
|
|
|
|
|
|
$_->{pos} == $state->{pos} && |
|
294
|
|
|
|
|
|
|
$_->{ref} == $state->{pos} } @$cset) { |
|
295
|
43
|
|
|
|
|
244
|
{ rule => $m, |
|
296
|
|
|
|
|
|
|
lhs => $p->{match}, |
|
297
|
|
|
|
|
|
|
dot => 0, |
|
298
|
|
|
|
|
|
|
pos => $state->{pos}, |
|
299
|
|
|
|
|
|
|
ref => $state->{pos} } |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
else { |
|
302
|
|
|
|
|
|
|
() |
|
303
|
19
|
|
|
|
|
39
|
} |
|
304
|
|
|
|
|
|
|
} @newset; |
|
305
|
43
|
|
|
|
|
60
|
push @$cset, @newset; |
|
306
|
43
|
|
|
|
|
95
|
$self->pushset(\@newset); |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub scan(\$$$) |
|
311
|
|
|
|
|
|
|
{ |
|
312
|
53
|
|
|
53
|
0
|
74
|
my ($self, $state, $str) = @_; |
|
313
|
53
|
|
|
|
|
114
|
my $cset = $self->{set}; |
|
314
|
53
|
|
|
|
|
73
|
my $skipos = $state->{pos}; |
|
315
|
53
|
|
|
|
|
89
|
my $p = $state->{rule}[$state->{dot}]; |
|
316
|
|
|
|
|
|
|
|
|
317
|
53
|
100
|
|
|
|
129
|
unless ($state->{rule}[$state->{dot}]{noskip}) { |
|
318
|
47
|
|
|
|
|
108
|
pos $str = $skipos; |
|
319
|
47
|
|
|
|
|
226
|
$str =~ /\G$self->{skip}/g; # Terminal Seperator! |
|
320
|
47
|
|
|
|
|
98
|
$skipos = pos $str; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
53
|
100
|
|
|
|
139
|
if ($p->{type} eq 'literal') { |
|
|
|
50
|
|
|
|
|
|
|
324
|
29
|
|
|
|
|
61
|
my $tok = substr($str, $skipos, length $p->{match}); |
|
325
|
29
|
|
|
|
|
37
|
my $res = 1; |
|
326
|
29
|
50
|
|
|
|
61
|
if ($p->{code}) { |
|
327
|
0
|
|
|
|
|
0
|
local $_ = $tok; |
|
328
|
0
|
|
|
|
|
0
|
$res = eval { $p->{code}() }; |
|
|
0
|
|
|
|
|
0
|
|
|
329
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $p->{line} of grammar\n" if $@; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
29
|
100
|
66
|
|
|
142
|
if ($res and $tok eq $p->{match}){ |
|
332
|
19
|
|
|
|
|
129
|
my $push = { |
|
333
|
|
|
|
|
|
|
rule => $state->{rule}, |
|
334
|
|
|
|
|
|
|
lhs => $state->{lhs}, |
|
335
|
|
|
|
|
|
|
dot => $state->{dot}+1, |
|
336
|
|
|
|
|
|
|
pos => $skipos+length $p->{match}, |
|
337
|
|
|
|
|
|
|
ref => $state->{ref}, |
|
338
|
|
|
|
|
|
|
tok => $tok , |
|
339
|
|
|
|
|
|
|
left => [ $state ] }; |
|
340
|
19
|
0
|
0
|
|
|
27
|
unless (grep { |
|
|
0
|
50
|
0
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
341
|
19
|
|
|
|
|
57
|
$_->{rule} == $push->{rule} && |
|
342
|
|
|
|
|
|
|
$_->{lhs} eq $push->{lhs} && |
|
343
|
|
|
|
|
|
|
$_->{dot} == $push->{dot} && |
|
344
|
|
|
|
|
|
|
$_->{pos} == $push->{pos} && |
|
345
|
|
|
|
|
|
|
$_->{ref} == $push->{ref} } @{$self->{ncset}} ) { |
|
346
|
19
|
|
|
|
|
21
|
push @{$self->{ncset}}, $push; |
|
|
19
|
|
|
|
|
91
|
|
|
347
|
|
|
|
|
|
|
# push @{$self->{sets}{$push->{pos}}}, $push; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
elsif ($p->{type} eq 'regex') { |
|
352
|
24
|
|
|
|
|
38
|
pos $str = $skipos; |
|
353
|
24
|
50
|
|
|
|
273
|
if ($str =~ /\G($p->{match})/g) { |
|
354
|
24
|
|
|
|
|
51
|
my $tok = $1; |
|
355
|
24
|
50
|
|
|
|
53
|
if ($p->{code}) { |
|
356
|
0
|
|
|
|
|
0
|
local $_ = $tok; |
|
357
|
0
|
|
|
|
|
0
|
my $res = eval { $p->{code}() }; |
|
|
0
|
|
|
|
|
0
|
|
|
358
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $p->{line} of grammar\n" if $@; |
|
359
|
0
|
0
|
|
|
|
0
|
return unless $res; # Should make this some sort of break |
|
360
|
|
|
|
|
|
|
} |
|
361
|
24
|
|
|
|
|
148
|
my $push = { |
|
362
|
|
|
|
|
|
|
rule => $state->{rule}, |
|
363
|
|
|
|
|
|
|
lhs => $state->{lhs}, |
|
364
|
|
|
|
|
|
|
dot => $state->{dot}+1, |
|
365
|
|
|
|
|
|
|
pos => pos $str, |
|
366
|
|
|
|
|
|
|
ref => $state->{ref}, |
|
367
|
|
|
|
|
|
|
tok => $tok, |
|
368
|
|
|
|
|
|
|
left => [ $state ] }; |
|
369
|
24
|
0
|
0
|
|
|
35
|
unless (grep { |
|
|
0
|
50
|
0
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
370
|
24
|
|
|
|
|
68
|
$_->{rule} == $push->{rule} && |
|
371
|
|
|
|
|
|
|
$_->{lhs} eq $push->{lhs} && |
|
372
|
|
|
|
|
|
|
$_->{dot} == $push->{dot} && |
|
373
|
|
|
|
|
|
|
$_->{pos} == $push->{pos} && |
|
374
|
|
|
|
|
|
|
$_->{ref} == $push->{ref} } @{$self->{ncset}} ) { |
|
375
|
24
|
|
|
|
|
24
|
push @{$self->{ncset}}, $push; |
|
|
24
|
|
|
|
|
46
|
|
|
376
|
24
|
|
|
|
|
27
|
push @{$self->{sets}{$push->{pos}}}, $push; |
|
|
24
|
|
|
|
|
167
|
|
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub complete(\$$) |
|
383
|
|
|
|
|
|
|
{ |
|
384
|
78
|
|
|
78
|
0
|
109
|
my ($self, $state) = @_; |
|
385
|
78
|
|
|
|
|
117
|
my $cset = $self->{set}; |
|
386
|
286
|
|
33
|
|
|
1579
|
my @newset = grep { |
|
387
|
78
|
|
|
|
|
185
|
(exists $_->{rule}[$_->{dot}] && |
|
388
|
|
|
|
|
|
|
$_->{rule}[$_->{dot}]{match}) eq $state->{lhs} } |
|
389
|
78
|
|
|
|
|
81
|
@{$self->{sets}{$state->{ref}}}; |
|
390
|
78
|
|
|
|
|
107
|
my @reval; |
|
391
|
83
|
|
|
|
|
89
|
@newset = map { |
|
392
|
78
|
|
|
|
|
125
|
my $m = $_; |
|
393
|
83
|
|
|
|
|
86
|
my @g; |
|
394
|
83
|
100
|
66
|
|
|
107
|
unless (@g = grep { $_->{rule} == $m->{rule} && |
|
|
149
|
100
|
100
|
|
|
775
|
|
|
|
|
|
66
|
|
|
|
|
|
395
|
|
|
|
|
|
|
$_->{lhs} eq $m->{lhs} && |
|
396
|
|
|
|
|
|
|
$_->{dot} == $m->{dot}+1 && |
|
397
|
|
|
|
|
|
|
$_->{pos} == $state->{pos} && |
|
398
|
|
|
|
|
|
|
$_->{ref} == $m->{ref} } @$cset) { |
|
399
|
|
|
|
|
|
|
|
|
400
|
81
|
|
|
|
|
551
|
my $push = { rule => $m->{rule}, |
|
401
|
|
|
|
|
|
|
lhs => $m->{lhs}, |
|
402
|
|
|
|
|
|
|
dot => $m->{dot}+1, |
|
403
|
|
|
|
|
|
|
pos => $state->{pos}, |
|
404
|
|
|
|
|
|
|
ref => $m->{ref}, |
|
405
|
|
|
|
|
|
|
down => [ $state ], |
|
406
|
|
|
|
|
|
|
left => [ $m ], |
|
407
|
|
|
|
|
|
|
}; |
|
408
|
81
|
50
|
|
|
|
218
|
if ($m->{rule}[$m->{dot}]{code}) { |
|
409
|
0
|
|
|
|
|
0
|
local $_ = $state; |
|
410
|
0
|
|
|
|
|
0
|
my $res = eval { $m->{rule}[$m->{dot}]{code}() }; |
|
|
0
|
|
|
|
|
0
|
|
|
411
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $m->{rule}[$m->{dot}]{line} " |
|
412
|
|
|
|
|
|
|
."of grammar\n" if $@; |
|
413
|
0
|
0
|
|
|
|
0
|
$res ? $push : () |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
else { |
|
416
|
81
|
|
|
|
|
211
|
$push |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
else { |
|
420
|
2
|
|
|
|
|
6
|
for (@g) { |
|
421
|
2
|
50
|
33
|
|
|
3
|
unless (grep { $_ == $state } @{$_->{down}} and |
|
|
2
|
|
|
|
|
11
|
|
|
|
2
|
|
|
|
|
5
|
|
|
|
0
|
|
|
|
|
0
|
|
|
422
|
0
|
|
|
|
|
0
|
grep { $_ == $m } @{$_->{left}}) { |
|
423
|
2
|
|
|
|
|
3
|
my $succ = 1; |
|
424
|
2
|
50
|
|
|
|
8
|
if ($m->{rule}[$m->{dot}]{code}) { |
|
425
|
0
|
|
|
|
|
0
|
my $left; |
|
426
|
0
|
|
|
|
|
0
|
local $_ = $state; |
|
427
|
0
|
|
|
|
|
0
|
$succ = eval { $m->{rule}[$m->{dot}]{code}() }; |
|
|
0
|
|
|
|
|
0
|
|
|
428
|
0
|
0
|
|
|
|
0
|
croak "$@ near line $m->{rule}[$m->{dot}]{line} " |
|
429
|
|
|
|
|
|
|
."of grammar\n" if $@; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
2
|
50
|
|
|
|
6
|
if ($succ) { |
|
432
|
2
|
|
|
|
|
3
|
push @{$_->{down}}, $state; |
|
|
2
|
|
|
|
|
5
|
|
|
433
|
2
|
|
|
|
|
3
|
push @{$_->{left}}, $m; |
|
|
2
|
|
|
|
|
7
|
|
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
} |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
() |
|
438
|
2
|
|
|
|
|
4
|
} |
|
439
|
|
|
|
|
|
|
} @newset; |
|
440
|
78
|
|
|
|
|
120
|
push @$cset, @newset; |
|
441
|
78
|
|
|
|
|
176
|
$self->pushset(\@newset); |
|
442
|
|
|
|
|
|
|
} |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
1; |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
__END__ |