line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pegex::Bootstrap; |
2
|
1
|
|
|
1
|
|
1237
|
use Pegex::Base; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
3
|
|
|
|
|
|
|
extends 'Pegex::Compiler'; |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
354
|
use Pegex::Grammar::Atoms; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
6
|
1
|
|
|
1
|
|
352
|
use Pegex::Pegex::AST; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
39
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
7
|
use Carp qw(carp confess croak); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2704
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
11
|
|
|
|
|
|
|
# The grammar. A DSL data structure. Things with '=' are tokens. |
12
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
13
|
|
|
|
|
|
|
has pointer => 0; |
14
|
|
|
|
|
|
|
has groups => []; |
15
|
|
|
|
|
|
|
has tokens => []; |
16
|
|
|
|
|
|
|
has ast => {}; |
17
|
|
|
|
|
|
|
has stack => []; |
18
|
|
|
|
|
|
|
has tree => {}; |
19
|
|
|
|
|
|
|
has grammar => { |
20
|
|
|
|
|
|
|
'grammar' => [ |
21
|
|
|
|
|
|
|
'=pegex-start', |
22
|
|
|
|
|
|
|
'meta-section', |
23
|
|
|
|
|
|
|
'rule-section', |
24
|
|
|
|
|
|
|
'=pegex-end', |
25
|
|
|
|
|
|
|
], |
26
|
|
|
|
|
|
|
'meta-section' => 'meta-directive*', |
27
|
|
|
|
|
|
|
'meta-directive' => [ |
28
|
|
|
|
|
|
|
'=directive-start', |
29
|
|
|
|
|
|
|
'=directive-value', |
30
|
|
|
|
|
|
|
'=directive-end', |
31
|
|
|
|
|
|
|
], |
32
|
|
|
|
|
|
|
'rule-section' => 'rule-definition*', |
33
|
|
|
|
|
|
|
'rule-definition' => [ |
34
|
|
|
|
|
|
|
'=rule-start', |
35
|
|
|
|
|
|
|
'=rule-sep', |
36
|
|
|
|
|
|
|
'rule-group', |
37
|
|
|
|
|
|
|
'=rule-end', |
38
|
|
|
|
|
|
|
], |
39
|
|
|
|
|
|
|
'rule-group' => 'any-group', |
40
|
|
|
|
|
|
|
'any-group' => [ |
41
|
|
|
|
|
|
|
'=list-alt?', |
42
|
|
|
|
|
|
|
'all-group', |
43
|
|
|
|
|
|
|
[ |
44
|
|
|
|
|
|
|
'=list-alt', |
45
|
|
|
|
|
|
|
'all-group', |
46
|
|
|
|
|
|
|
'*', |
47
|
|
|
|
|
|
|
], |
48
|
|
|
|
|
|
|
], |
49
|
|
|
|
|
|
|
'all-group' => 'rule-part+', |
50
|
|
|
|
|
|
|
'rule-part' => [ |
51
|
|
|
|
|
|
|
'rule-item', |
52
|
|
|
|
|
|
|
[ |
53
|
|
|
|
|
|
|
'=list-sep', |
54
|
|
|
|
|
|
|
'rule-item', |
55
|
|
|
|
|
|
|
'?', |
56
|
|
|
|
|
|
|
], |
57
|
|
|
|
|
|
|
], |
58
|
|
|
|
|
|
|
'rule-item' => [ |
59
|
|
|
|
|
|
|
'|', |
60
|
|
|
|
|
|
|
'=rule-reference', |
61
|
|
|
|
|
|
|
'=quoted-regex', |
62
|
|
|
|
|
|
|
'regular-expression', |
63
|
|
|
|
|
|
|
'bracketed-group', |
64
|
|
|
|
|
|
|
'whitespace-token', |
65
|
|
|
|
|
|
|
'=error-message', |
66
|
|
|
|
|
|
|
], |
67
|
|
|
|
|
|
|
'regular-expression' => [ |
68
|
|
|
|
|
|
|
'=regex-start', |
69
|
|
|
|
|
|
|
'=!regex-end*', |
70
|
|
|
|
|
|
|
'=regex-end', |
71
|
|
|
|
|
|
|
], |
72
|
|
|
|
|
|
|
'bracketed-group' => [ |
73
|
|
|
|
|
|
|
'=group-start', |
74
|
|
|
|
|
|
|
'rule-group', |
75
|
|
|
|
|
|
|
'=group-end', |
76
|
|
|
|
|
|
|
], |
77
|
|
|
|
|
|
|
'whitespace-token' => [ |
78
|
|
|
|
|
|
|
'|', |
79
|
|
|
|
|
|
|
'=whitespace-maybe', |
80
|
|
|
|
|
|
|
'=whitespace-must', |
81
|
|
|
|
|
|
|
], |
82
|
|
|
|
|
|
|
}; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
85
|
|
|
|
|
|
|
# Parser logic: |
86
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
87
|
|
|
|
|
|
|
sub parse { |
88
|
0
|
|
|
0
|
1
|
|
my ($self, $grammar_text) = @_; |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
$self->lex($grammar_text); |
91
|
|
|
|
|
|
|
# YYY $self->{tokens}; |
92
|
0
|
|
|
|
|
|
$self->{pointer} = 0; |
93
|
0
|
|
|
|
|
|
$self->{farthest} = 0; |
94
|
0
|
|
|
|
|
|
$self->{tree} = {}; |
95
|
|
|
|
|
|
|
|
96
|
0
|
0
|
|
|
|
|
$self->match_ref('grammar') || do { |
97
|
0
|
|
|
|
|
|
my $far = $self->{farthest}; |
98
|
0
|
|
|
|
|
|
my $tokens = $self->{tokens}; |
99
|
0
|
0
|
|
|
|
|
$far -= 4 if $far >= 4; |
100
|
0
|
|
|
|
|
|
WWW splice @$tokens, $far, 9; |
101
|
0
|
|
|
|
|
|
die "Bootstrap parse failed"; |
102
|
|
|
|
|
|
|
}; |
103
|
|
|
|
|
|
|
# XXX $self->{tree}; |
104
|
|
|
|
|
|
|
|
105
|
0
|
|
|
|
|
|
return $self; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub match_next { |
109
|
0
|
|
|
0
|
0
|
|
my ($self, $next) = @_; |
110
|
0
|
|
|
|
|
|
my $method; |
111
|
0
|
0
|
|
|
|
|
if (ref $next) { |
112
|
0
|
|
|
|
|
|
$next = [@$next]; |
113
|
0
|
0
|
|
|
|
|
if ($next->[0] eq '|') { |
114
|
0
|
|
|
|
|
|
shift @$next; |
115
|
0
|
|
|
|
|
|
$method = 'match_any'; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
else { |
118
|
0
|
|
|
|
|
|
$method = 'match_all'; |
119
|
|
|
|
|
|
|
} |
120
|
0
|
0
|
|
|
|
|
if ($next->[-1] =~ /^[\?\*\+]$/) { |
121
|
0
|
|
|
|
|
|
my $quant = pop @$next; |
122
|
0
|
|
|
|
|
|
return $self->match_times($quant, $method => $next); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
else { |
125
|
0
|
|
|
|
|
|
return $self->$method($next); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
else { |
129
|
0
|
0
|
|
|
|
|
$method = ($next =~ s/^=//) ? 'match_token' : 'match_ref'; |
130
|
0
|
0
|
|
|
|
|
if ($next =~ s/([\?\*\+])$//) { |
131
|
0
|
|
|
|
|
|
return $self->match_times($1, $method => $next); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
else { |
134
|
0
|
|
|
|
|
|
return $self->$method($next); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub match_times { |
140
|
0
|
|
|
0
|
0
|
|
my ($self, $quantity, $method, @args) = @_; |
141
|
0
|
0
|
|
|
|
|
my ($min, $max) = |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$quantity eq '' ? (1, 1) : |
143
|
|
|
|
|
|
|
$quantity eq '?' ? (0, 1) : |
144
|
|
|
|
|
|
|
$quantity eq '*' ? (0, 0) : |
145
|
|
|
|
|
|
|
$quantity eq '+' ? (1, 0) : die "Bad quantity '$quantity'"; |
146
|
0
|
|
0
|
|
|
|
my $stop = $max || 9999; |
147
|
0
|
|
|
|
|
|
my $count = 0; |
148
|
0
|
|
|
|
|
|
my $pointer = $self->{pointer}; |
149
|
0
|
|
0
|
|
|
|
while ($stop-- and $self->$method(@args)) { |
150
|
0
|
|
|
|
|
|
$count++; |
151
|
|
|
|
|
|
|
} |
152
|
0
|
0
|
0
|
|
|
|
return 1 if $count >= $min and (not $max or $count <= $max); |
|
|
|
0
|
|
|
|
|
153
|
0
|
|
|
|
|
|
$self->{pointer} = $pointer; |
154
|
0
|
0
|
|
|
|
|
$self->{farthest} = $pointer if $pointer > $self->{farthest}; |
155
|
0
|
|
|
|
|
|
return; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub match_any { |
159
|
0
|
|
|
0
|
0
|
|
my ($self, $any) = @_; |
160
|
0
|
|
|
|
|
|
my $pointer = $self->{pointer}; |
161
|
0
|
|
|
|
|
|
for (@$any) { |
162
|
0
|
0
|
|
|
|
|
if ($self->match_next($_)) { |
163
|
0
|
|
|
|
|
|
return 1; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
0
|
|
|
|
|
|
$self->{pointer} = $pointer; |
167
|
0
|
0
|
|
|
|
|
$self->{farthest} = $pointer if $pointer > $self->{farthest}; |
168
|
0
|
|
|
|
|
|
return; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub match_all { |
172
|
0
|
|
|
0
|
0
|
|
my ($self, $all) = @_; |
173
|
0
|
|
|
|
|
|
my $pointer = $self->{pointer}; |
174
|
0
|
|
|
|
|
|
for (@$all) { |
175
|
0
|
0
|
|
|
|
|
if (not $self->match_next($_)) { |
176
|
0
|
|
|
|
|
|
$self->{pointer} = $pointer; |
177
|
0
|
0
|
|
|
|
|
$self->{farthest} = $pointer if $pointer > $self->{farthest}; |
178
|
0
|
|
|
|
|
|
return; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
0
|
|
|
|
|
|
return 1; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub match_ref { |
185
|
0
|
|
|
0
|
0
|
|
my ($self, $ref) = @_; |
186
|
0
|
0
|
|
|
|
|
my $rule = $self->{grammar}->{$ref} |
187
|
|
|
|
|
|
|
or Carp::confess "Not a rule reference: '$ref'"; |
188
|
0
|
|
|
|
|
|
$self->match_next($rule); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub match_token { |
192
|
0
|
|
|
0
|
0
|
|
my ($self, $token_want) = @_; |
193
|
0
|
0
|
|
|
|
|
my $not = ($token_want =~ s/^\!//) ? 1 : 0; |
194
|
0
|
0
|
|
|
|
|
return if $self->{pointer} >= @{$self->{tokens}}; |
|
0
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
my $token = $self->{tokens}[$self->{pointer}]; |
196
|
0
|
|
|
|
|
|
my $token_got = $token->[0]; |
197
|
0
|
0
|
0
|
|
|
|
if (($token_want eq $token_got) xor $not) { |
198
|
0
|
|
|
|
|
|
$token_got =~ s/-/_/g; |
199
|
0
|
|
|
|
|
|
my $method = "got_$token_got"; |
200
|
0
|
0
|
|
|
|
|
if ($self->can($method)) { |
201
|
|
|
|
|
|
|
# print "$method\n"; |
202
|
0
|
|
|
|
|
|
$self->$method($token); |
203
|
|
|
|
|
|
|
} |
204
|
0
|
|
|
|
|
|
$self->{pointer}++; |
205
|
0
|
|
|
|
|
|
return 1; |
206
|
|
|
|
|
|
|
} |
207
|
0
|
|
|
|
|
|
return; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
211
|
|
|
|
|
|
|
# Receiver/ast-generator methods: |
212
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
213
|
|
|
|
|
|
|
sub got_directive_start { |
214
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
215
|
0
|
|
|
|
|
|
$self->{directive_name} = $token->[1]; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub got_directive_value { |
219
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
220
|
0
|
|
|
|
|
|
my $value = $token->[1]; |
221
|
0
|
|
|
|
|
|
$value =~ s/\s+$//; |
222
|
0
|
|
|
|
|
|
my $name = $self->{directive_name}; |
223
|
0
|
0
|
|
|
|
|
if (my $old_value = $self->{tree}{"+$name"}) { |
224
|
0
|
0
|
|
|
|
|
if (not ref($old_value)) { |
225
|
0
|
|
|
|
|
|
$old_value = $self->{tree}{"+$name"} = [$old_value]; |
226
|
|
|
|
|
|
|
} |
227
|
0
|
|
|
|
|
|
push @$old_value, $value; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
else { |
230
|
0
|
|
|
|
|
|
$self->{tree}{"+$name"} = $value; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub got_rule_start { |
235
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
236
|
0
|
|
|
|
|
|
$self->{stack} = []; |
237
|
0
|
|
|
|
|
|
my $rule_name = $token->[1]; |
238
|
0
|
|
|
|
|
|
$rule_name =~ s/-/_/g; |
239
|
0
|
|
|
|
|
|
$self->{rule_name} = $rule_name; |
240
|
0
|
|
0
|
|
|
|
$self->{tree}{'+toprule'} ||= $rule_name; |
241
|
0
|
|
|
|
|
|
$self->{groups} = [[0, ':']]; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub got_rule_end { |
245
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
246
|
0
|
|
|
|
|
|
$self->{tree}{$self->{rule_name}} = $self->group_ast; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub got_group_start { |
250
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
251
|
0
|
|
|
|
|
|
push @{$self->{groups}}, [scalar(@{$self->{stack}}), $token->[1]]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub got_group_end { |
255
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
256
|
0
|
|
|
|
|
|
my $rule = $self->group_ast; |
257
|
0
|
|
|
|
|
|
Pegex::Pegex::AST::set_quantity($rule, $token->[1]); |
258
|
0
|
|
|
|
|
|
push @{$self->{stack}}, $rule; |
|
0
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub got_list_alt { |
262
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
263
|
0
|
|
|
|
|
|
push @{$self->{stack}}, '|'; |
|
0
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub got_list_sep { |
267
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
268
|
0
|
|
|
|
|
|
push @{$self->{stack}}, $token->[1]; |
|
0
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub got_rule_reference { |
272
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
273
|
0
|
|
|
|
|
|
my $name = $token->[2]; |
274
|
0
|
|
|
|
|
|
$name =~ s/-/_/g; |
275
|
0
|
|
|
|
|
|
$name =~ s/^<(.*)>$/$1/; |
276
|
0
|
|
|
|
|
|
my $rule = { '.ref' => $name }; |
277
|
0
|
|
|
|
|
|
Pegex::Pegex::AST::set_modifier($rule, $token->[1]); |
278
|
0
|
|
|
|
|
|
Pegex::Pegex::AST::set_quantity($rule, $token->[3]); |
279
|
0
|
|
|
|
|
|
push @{$self->{stack}}, $rule; |
|
0
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub got_error_message { |
283
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
284
|
0
|
|
|
|
|
|
push @{$self->{stack}}, { '.err' => $token->[1] }; |
|
0
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub got_whitespace_maybe { |
288
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
289
|
0
|
|
|
|
|
|
$self->got_rule_reference(['whitespace-maybe', undef, '_', undef]); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub got_whitespace_must { |
293
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
294
|
0
|
|
|
|
|
|
$self->got_rule_reference(['whitespace-maybe', undef, '__', undef]); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub got_quoted_regex { |
298
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
299
|
0
|
|
|
|
|
|
my $regex = $token->[1]; |
300
|
0
|
|
|
|
|
|
$regex =~ s/([^\w\`\%\:\<\/\,\=\;])/\\$1/g; |
301
|
0
|
|
|
|
|
|
push @{$self->{stack}}, { '.rgx' => $regex }; |
|
0
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub got_regex_start { |
305
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
306
|
0
|
|
|
|
|
|
push @{$self->{groups}}, [scalar(@{$self->{stack}}), '/', $token->[1]]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub got_regex_end { |
310
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
311
|
0
|
|
|
|
|
|
my ($x, $y, $gmod) = @{$self->{groups}[-1]}; |
|
0
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
my $regex = join '', map { |
313
|
0
|
0
|
|
|
|
|
if (ref($_)) { |
314
|
0
|
|
|
|
|
|
my $part; |
315
|
0
|
0
|
|
|
|
|
if (defined($part = $_->{'.rgx'})) { |
|
|
0
|
|
|
|
|
|
316
|
0
|
|
|
|
|
|
$part; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
elsif (defined($part = $_->{'.ref'})) { |
319
|
0
|
|
|
|
|
|
"<$part>"; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
else { |
322
|
0
|
|
|
|
|
|
XXX $_; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
else { |
326
|
0
|
|
|
|
|
|
$_; |
327
|
|
|
|
|
|
|
} |
328
|
0
|
|
|
|
|
|
} splice(@{$self->{stack}}, (pop @{$self->{groups}})->[0]); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
|
$regex =~ s!\(([ism]?\:|\=|\!)!(?$1!g; |
330
|
0
|
|
|
|
|
|
my $rgx = {'.rgx' => $regex}; |
331
|
0
|
0
|
|
|
|
|
Pegex::Pegex::AST::set_modifier($rgx, $gmod) |
332
|
|
|
|
|
|
|
if $gmod; |
333
|
0
|
|
|
|
|
|
push @{$self->{stack}}, $rgx; |
|
0
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub got_regex_raw { |
337
|
0
|
|
|
0
|
0
|
|
my ($self, $token) = @_; |
338
|
0
|
|
|
|
|
|
push @{$self->{stack}}, $token->[1]; |
|
0
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
342
|
|
|
|
|
|
|
# Receiver helper methods: |
343
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
344
|
|
|
|
|
|
|
sub group_ast { |
345
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
346
|
0
|
|
|
|
|
|
my ($offset, $gmod) = @{pop @{$self->{groups}}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
347
|
0
|
|
0
|
|
|
|
$gmod ||= ''; |
348
|
0
|
|
|
|
|
|
my $rule = [splice(@{$self->{stack}}, $offset)]; |
|
0
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
|
350
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @$rule-1; $i++) { |
351
|
0
|
0
|
|
|
|
|
if ($rule->[$i + 1] =~ /^%%?$/) { |
352
|
0
|
|
|
|
|
|
$rule->[$i] = Pegex::Pegex::AST::set_separator( |
353
|
|
|
|
|
|
|
$rule->[$i], |
354
|
|
|
|
|
|
|
splice @$rule, $i+1, 2 |
355
|
|
|
|
|
|
|
); |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
0
|
|
|
|
|
|
my $started = 0; |
359
|
0
|
0
|
0
|
|
|
|
for ( |
360
|
|
|
|
|
|
|
my $i = (@$rule and $rule->[0] eq '|') ? 1 : 0; |
361
|
|
|
|
|
|
|
$i < @$rule-1; |
362
|
|
|
|
|
|
|
$i++ |
363
|
|
|
|
|
|
|
) { |
364
|
0
|
0
|
|
|
|
|
next if $rule->[$i] eq '|'; |
365
|
0
|
0
|
|
|
|
|
if ($rule->[$i+1] eq '|') { |
366
|
0
|
|
|
|
|
|
$i++; |
367
|
0
|
|
|
|
|
|
$started = 0; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
else { |
370
|
0
|
0
|
|
|
|
|
$rule->[$i] = {'.all' => [$rule->[$i]]} |
371
|
|
|
|
|
|
|
unless $started++; |
372
|
0
|
|
|
|
|
|
push @{$rule->[$i]{'.all'}}, splice @$rule, $i+1, 1; |
|
0
|
|
|
|
|
|
|
373
|
0
|
|
|
|
|
|
$i-- |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
} |
376
|
0
|
0
|
|
|
|
|
if (grep {$_ eq '|'} @$rule) { |
|
0
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
|
$rule = [{'.any' => [ grep {$_ ne '|'} @$rule ]}]; |
|
0
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
0
|
0
|
|
|
|
|
$rule = $rule->[0] if @$rule <= 1; |
381
|
0
|
0
|
|
|
|
|
Pegex::Pegex::AST::set_modifier($rule, $gmod) |
382
|
|
|
|
|
|
|
unless $gmod eq ':'; |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
|
return $rule; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# DEBUG: wrap/trace parse methods: |
388
|
|
|
|
|
|
|
# for my $method (qw( |
389
|
|
|
|
|
|
|
# match_times match_next match_ref match_token match_any match_all |
390
|
|
|
|
|
|
|
# )) { |
391
|
|
|
|
|
|
|
# no strict 'refs'; |
392
|
|
|
|
|
|
|
# no warnings 'redefine'; |
393
|
|
|
|
|
|
|
# my $orig = \&$method; |
394
|
|
|
|
|
|
|
# *$method = sub { |
395
|
|
|
|
|
|
|
# my $self = shift; |
396
|
|
|
|
|
|
|
# my $args = join ', ', map { |
397
|
|
|
|
|
|
|
# ref($_) ? '[' . join(', ', @$_) . ']' : |
398
|
|
|
|
|
|
|
# length($_) ? $_ : "''" |
399
|
|
|
|
|
|
|
# } @_; |
400
|
|
|
|
|
|
|
# print "$method($args)\n"; |
401
|
|
|
|
|
|
|
# die if $main::x++ > 250; |
402
|
|
|
|
|
|
|
# $orig->($self, @_); |
403
|
|
|
|
|
|
|
# }; |
404
|
|
|
|
|
|
|
# } |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
407
|
|
|
|
|
|
|
# Lexer logic: |
408
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
409
|
|
|
|
|
|
|
my $ALPHA = 'A-Za-z'; |
410
|
|
|
|
|
|
|
my $DIGIT = '0-9'; |
411
|
|
|
|
|
|
|
my $DASH = '\-'; |
412
|
|
|
|
|
|
|
my $SEMI = '\;'; |
413
|
|
|
|
|
|
|
my $UNDER = '\_'; |
414
|
|
|
|
|
|
|
my $HASH = '\#'; |
415
|
|
|
|
|
|
|
my $EOL = '\r?\n'; |
416
|
|
|
|
|
|
|
my $WORD = "$DASH$UNDER$ALPHA$DIGIT"; |
417
|
|
|
|
|
|
|
my $WS = "(?:[\ \t]|$HASH.*$EOL)"; |
418
|
|
|
|
|
|
|
my $MOD = '[\!\=\-\+\.]'; |
419
|
|
|
|
|
|
|
my $GMOD = '[\.\-]'; |
420
|
|
|
|
|
|
|
my $QUANT = '(?:[\?\*\+]|\d+(?:\+|\-\d+)?)'; |
421
|
|
|
|
|
|
|
my $NAME = "$UNDER?[$UNDER$ALPHA](?:[$WORD]*[$ALPHA$DIGIT])?"; |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Repeated Rules: |
424
|
|
|
|
|
|
|
my $rem = [qr/\A(?:$WS+|$EOL+)/]; |
425
|
|
|
|
|
|
|
my $qr = [qr/\A\'((?:\\.|[^\'])*)\'/, 'quoted-regex']; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# Lexer regex tree: |
428
|
|
|
|
|
|
|
has regexes => { |
429
|
|
|
|
|
|
|
pegex => [ |
430
|
|
|
|
|
|
|
[qr/\A%(grammar|version|extends|include)$WS+/, |
431
|
|
|
|
|
|
|
'directive-start', 'directive'], |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
[qr/\A($NAME)(?=$WS*\:)/, |
434
|
|
|
|
|
|
|
'rule-start', 'rule'], |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
$rem, |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
[qr/\A\z/, |
439
|
|
|
|
|
|
|
'pegex-end', 'end'], |
440
|
|
|
|
|
|
|
], |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
rule => [ |
443
|
|
|
|
|
|
|
[qr/\A(?:$SEMI$WS*$EOL?|\s*$EOL|)(?=$NAME$WS*\:|\z)/, |
444
|
|
|
|
|
|
|
'rule-end', 'end'], |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
[qr/\A\:/, |
447
|
|
|
|
|
|
|
'rule-sep'], |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
[qr/\A(?:\+|\~\~)(?=\s)/, |
450
|
|
|
|
|
|
|
'whitespace-must'], |
451
|
|
|
|
|
|
|
[qr/\A(?:\-|\~)(?=\s)/, |
452
|
|
|
|
|
|
|
'whitespace-maybe'], |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
$qr, |
455
|
|
|
|
|
|
|
[qr/\A($MOD)?($NAME|<$NAME>)($QUANT)?(?!$WS*$NAME\:)/, |
456
|
|
|
|
|
|
|
'rule-reference'], |
457
|
|
|
|
|
|
|
[qr/\A($GMOD)?\//, |
458
|
|
|
|
|
|
|
'regex-start', 'regex'], |
459
|
|
|
|
|
|
|
[qr/\A\`([^\`\n]*?)\`/, |
460
|
|
|
|
|
|
|
'error-message'], |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
[qr/\A($GMOD)?\(/, |
463
|
|
|
|
|
|
|
'group-start'], |
464
|
|
|
|
|
|
|
[qr/\A\)($QUANT)?/, |
465
|
|
|
|
|
|
|
'group-end'], |
466
|
|
|
|
|
|
|
[qr/\A\|/, |
467
|
|
|
|
|
|
|
'list-alt'], |
468
|
|
|
|
|
|
|
[qr/\A(\%\%?)/, |
469
|
|
|
|
|
|
|
'list-sep'], |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
$rem, |
472
|
|
|
|
|
|
|
], |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
directive => [ |
475
|
|
|
|
|
|
|
[qr/\A(\S.*)/, |
476
|
|
|
|
|
|
|
'directive-value'], |
477
|
|
|
|
|
|
|
[qr/\A$EOL/, |
478
|
|
|
|
|
|
|
'directive-end', 'end'] |
479
|
|
|
|
|
|
|
], |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
regex => [ |
482
|
|
|
|
|
|
|
[qr/\A$WS+(?:\+|\~\~|\-\-)/, |
483
|
|
|
|
|
|
|
'whitespace-must'], |
484
|
|
|
|
|
|
|
[qr/\A(?:\-|~)(?![-~])/, |
485
|
|
|
|
|
|
|
'whitespace-maybe'], |
486
|
|
|
|
|
|
|
$qr, |
487
|
|
|
|
|
|
|
[qr/\A$WS+()($NAME|<$NAME>)/, |
488
|
|
|
|
|
|
|
'rule-reference'], |
489
|
|
|
|
|
|
|
[qr/\A([^\s\'\/]+)/, |
490
|
|
|
|
|
|
|
'regex-raw'], |
491
|
|
|
|
|
|
|
[qr/\A$WS+/], |
492
|
|
|
|
|
|
|
[qr/\A$EOL+/], |
493
|
|
|
|
|
|
|
[qr/\A\//, |
494
|
|
|
|
|
|
|
'regex-end', 'end'], |
495
|
|
|
|
|
|
|
$rem, |
496
|
|
|
|
|
|
|
], |
497
|
|
|
|
|
|
|
}; |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub lex { |
500
|
0
|
|
|
0
|
0
|
|
my ($self, $grammar) = @_; |
501
|
|
|
|
|
|
|
|
502
|
0
|
|
|
|
|
|
my $tokens = $self->{tokens} = [['pegex-start']]; |
503
|
0
|
|
|
|
|
|
my $stack = ['pegex']; |
504
|
0
|
|
|
|
|
|
my $pos = 0; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
OUTER: while (1) { |
507
|
0
|
|
|
|
|
|
my $state = $stack->[-1]; |
508
|
0
|
0
|
|
|
|
|
my $set = $self->{regexes}->{$state} or die "Invalid state '$state'"; |
509
|
0
|
|
|
|
|
|
for my $entry (@$set) { |
510
|
0
|
|
|
|
|
|
my ($regex, $name, $scope) = @$entry; |
511
|
0
|
0
|
|
|
|
|
if (substr($grammar, $pos) =~ $regex) { |
512
|
0
|
|
|
|
|
|
$pos += length($&); |
513
|
0
|
0
|
|
|
|
|
if ($name) { |
514
|
1
|
|
|
1
|
|
7
|
no strict 'refs'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
320
|
|
515
|
0
|
|
|
|
|
|
my @captures = map $$_, 1..$#+; |
516
|
|
|
|
|
|
|
pop @captures |
517
|
0
|
|
0
|
|
|
|
while @captures and not defined $captures[-1]; |
518
|
0
|
|
|
|
|
|
push @$tokens, [$name, @captures]; |
519
|
0
|
0
|
|
|
|
|
if ($scope) { |
520
|
0
|
0
|
|
|
|
|
if ($scope eq 'end') { |
521
|
0
|
|
|
|
|
|
pop @$stack; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
else { |
524
|
0
|
|
|
|
|
|
push @$stack, $scope; |
525
|
|
|
|
|
|
|
# Hack to support /+ …/ |
526
|
0
|
0
|
|
|
|
|
if ($scope eq 'regex') { |
527
|
0
|
0
|
|
|
|
|
if (substr($grammar, $pos) =~ /\A\+(?=[\s\/])/) { |
528
|
0
|
|
|
|
|
|
$pos += length($&); |
529
|
0
|
|
|
|
|
|
push @$tokens, ['whitespace-must']; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
} |
535
|
0
|
0
|
|
|
|
|
last OUTER unless @$stack; |
536
|
0
|
|
|
|
|
|
next OUTER; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
} |
539
|
0
|
|
|
|
|
|
my $text = substr($grammar, $pos, 50); |
540
|
0
|
|
|
|
|
|
$text =~ s/\n/\\n/g; |
541
|
0
|
|
|
|
|
|
WWW $tokens; |
542
|
0
|
|
|
|
|
|
die <<"..."; |
543
|
|
|
|
|
|
|
Failed to lex $state here-->$text |
544
|
|
|
|
|
|
|
... |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
1; |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
# vim: set lisp: |