| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
|
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2010-2022 -- leonerd@leonerd.org.uk |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Parser::MGC 0.20; |
|
7
|
|
|
|
|
|
|
|
|
8
|
31
|
|
|
31
|
|
1418337
|
use v5.14; |
|
|
31
|
|
|
|
|
1556
|
|
|
9
|
31
|
|
|
31
|
|
141
|
use warnings; |
|
|
31
|
|
|
|
|
44
|
|
|
|
31
|
|
|
|
|
741
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
31
|
|
|
31
|
|
127
|
use Carp; |
|
|
31
|
|
|
|
|
48
|
|
|
|
31
|
|
|
|
|
1633
|
|
|
12
|
31
|
|
|
31
|
|
12849
|
use Feature::Compat::Try; |
|
|
31
|
|
|
|
|
8602
|
|
|
|
31
|
|
|
|
|
106
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
31
|
|
|
31
|
|
71230
|
use Scalar::Util qw( blessed ); |
|
|
31
|
|
|
|
|
61
|
|
|
|
31
|
|
|
|
|
3009
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
C - build simple recursive-descent parsers |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package My::Grammar::Parser; |
|
23
|
|
|
|
|
|
|
use base qw( Parser::MGC ); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub parse |
|
26
|
|
|
|
|
|
|
{ |
|
27
|
|
|
|
|
|
|
my $self = shift; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$self->sequence_of( sub { |
|
30
|
|
|
|
|
|
|
$self->any_of( |
|
31
|
|
|
|
|
|
|
sub { $self->token_int }, |
|
32
|
|
|
|
|
|
|
sub { $self->token_string }, |
|
33
|
|
|
|
|
|
|
sub { \$self->token_ident }, |
|
34
|
|
|
|
|
|
|
sub { $self->scope_of( "(", \&parse, ")" ) } |
|
35
|
|
|
|
|
|
|
); |
|
36
|
|
|
|
|
|
|
} ); |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $parser = My::Grammar::Parser->new; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $tree = $parser->from_file( $ARGV[0] ); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
... |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This base class provides a low-level framework for building recursive-descent |
|
48
|
|
|
|
|
|
|
parsers that consume a given input string from left to right, returning a |
|
49
|
|
|
|
|
|
|
parse structure. It takes its name from the C regexps used to implement |
|
50
|
|
|
|
|
|
|
the token parsing behaviour. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
It provides a number of token-parsing methods, which each extract a |
|
53
|
|
|
|
|
|
|
grammatical token from the string. It also provides wrapping methods that can |
|
54
|
|
|
|
|
|
|
be used to build up a possibly-recursive grammar structure, by applying a |
|
55
|
|
|
|
|
|
|
structure around other parts of parsing code. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head2 Backtracking |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Each method, both token and structural, atomically either consumes a prefix of |
|
60
|
|
|
|
|
|
|
the string and returns its result, or fails and consumes nothing. This makes |
|
61
|
|
|
|
|
|
|
it simple to implement grammars that require backtracking. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Several structure-forming methods have some form of "optional" behaviour; they |
|
64
|
|
|
|
|
|
|
can optionally consume some amount of input or take some particular choice, |
|
65
|
|
|
|
|
|
|
but if the code invoked inside that subsequently fails, the structure can |
|
66
|
|
|
|
|
|
|
backtrack and take some different behaviour. This is usually what is required |
|
67
|
|
|
|
|
|
|
when testing whether the structure of the input string matches some part of |
|
68
|
|
|
|
|
|
|
the grammar that is optional, or has multiple choices. |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
However, once the choice of grammar has been made, it is often useful to be |
|
71
|
|
|
|
|
|
|
able to fix on that one choice, thus making subsequent failures propagate up |
|
72
|
|
|
|
|
|
|
rather than taking that alternative behaviour. Control of this backtracking |
|
73
|
|
|
|
|
|
|
is given by the C method; and careful use of this method is one of the |
|
74
|
|
|
|
|
|
|
key advantages that C has over more simple parsing using single |
|
75
|
|
|
|
|
|
|
regexps alone. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=cut |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
$parser = Parser::MGC->new( %args ) |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Returns a new instance of a C object. This must be called on a |
|
88
|
|
|
|
|
|
|
subclass that provides method of the name provided as C, by default |
|
89
|
|
|
|
|
|
|
called C. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Takes the following named arguments |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=over 8 |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item toplevel => STRING |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Name of the toplevel method to use to start the parse from. If not supplied, |
|
98
|
|
|
|
|
|
|
will try to use a method called C. |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item patterns => HASH |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Keys in this hash should map to quoted regexp (C) references, to |
|
103
|
|
|
|
|
|
|
override the default patterns used to match tokens. See C below |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item accept_0o_oct => BOOL |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
If true, the C method will also accept integers with a C<0o> prefix |
|
108
|
|
|
|
|
|
|
as octal. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=back |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 PATTERNS |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
The following pattern names are recognised. They may be passed to the |
|
117
|
|
|
|
|
|
|
constructor in the C hash, or provided as a class method under the |
|
118
|
|
|
|
|
|
|
name C>. |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=over 4 |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item * ws |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Pattern used to skip whitespace between tokens. Defaults to C[\s\n\t]+/> |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * comment |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Pattern used to skip comments between tokens. Undefined by default. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item * int |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Pattern used to parse an integer by C. Defaults to |
|
133
|
|
|
|
|
|
|
C-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/>. If C is given, then |
|
134
|
|
|
|
|
|
|
this will be expanded to match C0o[0-7]+/> as well. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * float |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Pattern used to parse a floating-point number by C. Defaults to |
|
139
|
|
|
|
|
|
|
C-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i>. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item * ident |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Pattern used to parse an identifier by C. Defaults to |
|
144
|
|
|
|
|
|
|
C[[:alpha:]_]\w*/> |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item * string_delim |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Pattern used to delimit a string by C. Defaults to C["']/>. |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=back |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
my @patterns = qw( |
|
155
|
|
|
|
|
|
|
ws |
|
156
|
|
|
|
|
|
|
comment |
|
157
|
|
|
|
|
|
|
int |
|
158
|
|
|
|
|
|
|
float |
|
159
|
|
|
|
|
|
|
ident |
|
160
|
|
|
|
|
|
|
string_delim |
|
161
|
|
|
|
|
|
|
); |
|
162
|
|
|
|
|
|
|
|
|
163
|
31
|
|
|
31
|
|
177
|
use constant pattern_ws => qr/[\s\n\t]+/; |
|
|
31
|
|
|
|
|
51
|
|
|
|
31
|
|
|
|
|
1635
|
|
|
164
|
31
|
|
|
31
|
|
146
|
use constant pattern_comment => undef; |
|
|
31
|
|
|
|
|
56
|
|
|
|
31
|
|
|
|
|
2365
|
|
|
165
|
31
|
|
|
31
|
|
178
|
use constant pattern_int => qr/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/; |
|
|
31
|
|
|
|
|
58
|
|
|
|
31
|
|
|
|
|
2394
|
|
|
166
|
31
|
|
|
31
|
|
178
|
use constant pattern_float => qr/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i; |
|
|
31
|
|
|
|
|
53
|
|
|
|
31
|
|
|
|
|
3078
|
|
|
167
|
31
|
|
|
31
|
|
504
|
use constant pattern_ident => qr/[[:alpha:]_]\w*/; |
|
|
31
|
|
|
|
|
108
|
|
|
|
31
|
|
|
|
|
1685
|
|
|
168
|
31
|
|
|
31
|
|
151
|
use constant pattern_string_delim => qr/["']/; |
|
|
31
|
|
|
|
|
51
|
|
|
|
31
|
|
|
|
|
113440
|
|
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub new |
|
171
|
|
|
|
|
|
|
{ |
|
172
|
46
|
|
|
46
|
1
|
2555
|
my $class = shift; |
|
173
|
46
|
|
|
|
|
114
|
my %args = @_; |
|
174
|
|
|
|
|
|
|
|
|
175
|
46
|
|
100
|
|
|
231
|
my $toplevel = $args{toplevel} || "parse"; |
|
176
|
|
|
|
|
|
|
|
|
177
|
46
|
50
|
|
|
|
299
|
$class->can( $toplevel ) or |
|
178
|
|
|
|
|
|
|
croak "Expected to be a subclass that can ->$toplevel"; |
|
179
|
|
|
|
|
|
|
|
|
180
|
46
|
|
|
|
|
204
|
my $self = bless { |
|
181
|
|
|
|
|
|
|
toplevel => $toplevel, |
|
182
|
|
|
|
|
|
|
patterns => {}, |
|
183
|
|
|
|
|
|
|
scope_level => 0, |
|
184
|
|
|
|
|
|
|
}, $class; |
|
185
|
|
|
|
|
|
|
|
|
186
|
46
|
|
100
|
|
|
241
|
$self->{patterns}{$_} = $args{patterns}{$_} || $self->${\"pattern_$_"} for @patterns; |
|
187
|
|
|
|
|
|
|
|
|
188
|
46
|
100
|
|
|
|
141
|
if( $args{accept_0o_oct} ) { |
|
189
|
1
|
|
|
|
|
30
|
$self->{patterns}{int} = qr/0o[0-7]+|$self->{patterns}{int}/; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
46
|
100
|
|
|
|
142
|
if( defined $self->{patterns}{comment} ) { |
|
193
|
1
|
|
|
|
|
23
|
$self->{patterns}{_skip} = qr/$self->{patterns}{ws}|$self->{patterns}{comment}/; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
else { |
|
196
|
45
|
|
|
|
|
101
|
$self->{patterns}{_skip} = $self->{patterns}{ws}; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
46
|
|
|
|
|
253
|
return $self; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 METHODS |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head2 from_string |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
$result = $parser->from_string( $str ) |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Parse the given literal string and return the result from the toplevel method. |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=cut |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub from_string |
|
215
|
|
|
|
|
|
|
{ |
|
216
|
161
|
|
|
161
|
1
|
33770
|
my $self = shift; |
|
217
|
161
|
|
|
|
|
287
|
my ( $str ) = @_; |
|
218
|
|
|
|
|
|
|
|
|
219
|
161
|
|
|
|
|
286
|
$self->{str} = $str; |
|
220
|
|
|
|
|
|
|
|
|
221
|
161
|
|
|
|
|
372
|
pos $self->{str} = 0; |
|
222
|
|
|
|
|
|
|
|
|
223
|
161
|
|
|
|
|
282
|
my $toplevel = $self->{toplevel}; |
|
224
|
161
|
|
|
|
|
475
|
my $result = $self->$toplevel; |
|
225
|
|
|
|
|
|
|
|
|
226
|
143
|
100
|
|
|
|
434
|
$self->at_eos or |
|
227
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
|
228
|
|
|
|
|
|
|
|
|
229
|
138
|
|
|
|
|
602
|
return $result; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 from_file |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
$result = $parser->from_file( $file, %opts ) |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Parse the given file, which may be a pathname in a string, or an opened IO |
|
237
|
|
|
|
|
|
|
handle, and return the result from the toplevel method. |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
The following options are recognised: |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=over 8 |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=item binmode => STRING |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
If set, applies the given binmode to the filehandle before reading. Typically |
|
246
|
|
|
|
|
|
|
this can be used to set the encoding of the file. |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
$parser->from_file( $file, binmode => ":encoding(UTF-8)" ) |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=back |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub from_file |
|
255
|
|
|
|
|
|
|
{ |
|
256
|
3
|
|
|
3
|
1
|
1350
|
my $self = shift; |
|
257
|
3
|
|
|
|
|
8
|
my ( $file, %opts ) = @_; |
|
258
|
|
|
|
|
|
|
|
|
259
|
3
|
50
|
|
|
|
10
|
defined $file or croak "Expected a filename to ->from_file"; |
|
260
|
|
|
|
|
|
|
|
|
261
|
3
|
|
|
|
|
6
|
$self->{filename} = $file; |
|
262
|
|
|
|
|
|
|
|
|
263
|
3
|
|
|
|
|
5
|
my $fh; |
|
264
|
3
|
100
|
|
|
|
10
|
if( ref $file ) { |
|
265
|
2
|
|
|
|
|
4
|
$fh = $file; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
else { |
|
268
|
1
|
50
|
|
|
|
35
|
open $fh, "<", $file or die "Cannot open $file for reading - $!"; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
3
|
50
|
|
|
|
11
|
binmode $fh, $opts{binmode} if $opts{binmode}; |
|
272
|
|
|
|
|
|
|
|
|
273
|
3
|
|
|
|
|
6
|
$self->from_string( do { local $/; <$fh>; } ); |
|
|
3
|
|
|
|
|
12
|
|
|
|
3
|
|
|
|
|
106
|
|
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 filename |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
$filename = $parser->filename |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
I |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Returns the name of the file currently being parsed, if invoked from within |
|
283
|
|
|
|
|
|
|
L. |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=cut |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub filename |
|
288
|
|
|
|
|
|
|
{ |
|
289
|
43
|
|
|
43
|
1
|
68
|
my $self = shift; |
|
290
|
43
|
|
|
|
|
101
|
return $self->{filename}; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 from_reader |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
$result = $parser->from_reader( \&reader ) |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
I |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
Parse the input which is read by the C function. This function will be |
|
300
|
|
|
|
|
|
|
called in scalar context to generate portions of string to parse, being passed |
|
301
|
|
|
|
|
|
|
the C<$parser> object. The function should return C when it has no more |
|
302
|
|
|
|
|
|
|
string to return. |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
$reader->( $parser ) |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Note that because it is not generally possible to detect exactly when more |
|
307
|
|
|
|
|
|
|
input may be required due to failed regexp parsing, the reader function is |
|
308
|
|
|
|
|
|
|
only invoked during searching for skippable whitespace. This makes it suitable |
|
309
|
|
|
|
|
|
|
for reading lines of a file in the common case where lines are considered as |
|
310
|
|
|
|
|
|
|
skippable whitespace, or for reading lines of input interactively from a |
|
311
|
|
|
|
|
|
|
user. It cannot be used in all cases (for example, reading fixed-size buffers |
|
312
|
|
|
|
|
|
|
from a file) because two successive invocations may split a single token |
|
313
|
|
|
|
|
|
|
across the buffer boundaries, and cause parse failures. |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=cut |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub from_reader |
|
318
|
|
|
|
|
|
|
{ |
|
319
|
1
|
|
|
1
|
1
|
13
|
my $self = shift; |
|
320
|
1
|
|
|
|
|
2
|
my ( $reader ) = @_; |
|
321
|
|
|
|
|
|
|
|
|
322
|
1
|
|
|
|
|
2
|
local $self->{reader} = $reader; |
|
323
|
|
|
|
|
|
|
|
|
324
|
1
|
|
|
|
|
2
|
$self->{str} = ""; |
|
325
|
1
|
|
|
|
|
3
|
pos $self->{str} = 0; |
|
326
|
|
|
|
|
|
|
|
|
327
|
1
|
|
|
|
|
4
|
my $result = $self->parse; |
|
328
|
|
|
|
|
|
|
|
|
329
|
1
|
50
|
|
|
|
2
|
$self->at_eos or |
|
330
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
|
331
|
|
|
|
|
|
|
|
|
332
|
1
|
|
|
|
|
9
|
return $result; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 pos |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$pos = $parser->pos |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
I |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Returns the current parse position, as a character offset from the beginning |
|
342
|
|
|
|
|
|
|
of the file or string. |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=cut |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub pos |
|
347
|
|
|
|
|
|
|
{ |
|
348
|
279
|
|
|
279
|
1
|
358
|
my $self = shift; |
|
349
|
279
|
|
|
|
|
600
|
return pos $self->{str}; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 take |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
$str = $parser->take( $len ) |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
I |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Returns the next C<$len> characters directly from the input, prior to any |
|
359
|
|
|
|
|
|
|
whitespace or comment skipping. This does I take account of any |
|
360
|
|
|
|
|
|
|
end-of-scope marker that may be pending. It is intended for use by parsers of |
|
361
|
|
|
|
|
|
|
partially-binary protocols, or other situations in which it would be incorrect |
|
362
|
|
|
|
|
|
|
for the end-of-scope marker to take effect at this time. |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub take |
|
367
|
|
|
|
|
|
|
{ |
|
368
|
46
|
|
|
46
|
1
|
64
|
my $self = shift; |
|
369
|
46
|
|
|
|
|
66
|
my ( $len ) = @_; |
|
370
|
|
|
|
|
|
|
|
|
371
|
46
|
|
|
|
|
65
|
my $start = pos( $self->{str} ); |
|
372
|
|
|
|
|
|
|
|
|
373
|
46
|
|
|
|
|
90
|
pos( $self->{str} ) += $len; |
|
374
|
|
|
|
|
|
|
|
|
375
|
46
|
|
|
|
|
218
|
return substr( $self->{str}, $start, $len ); |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=head2 where |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
( $lineno, $col, $text ) = $parser->where |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Returns the current parse position, as a line and column number, and |
|
383
|
|
|
|
|
|
|
the entire current line of text. The first line is numbered 1, and the first |
|
384
|
|
|
|
|
|
|
column is numbered 0. |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=cut |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub where |
|
389
|
|
|
|
|
|
|
{ |
|
390
|
49
|
|
|
49
|
1
|
79
|
my $self = shift; |
|
391
|
49
|
|
|
|
|
89
|
my ( $pos ) = @_; |
|
392
|
|
|
|
|
|
|
|
|
393
|
49
|
100
|
|
|
|
127
|
defined $pos or $pos = pos $self->{str}; |
|
394
|
|
|
|
|
|
|
|
|
395
|
49
|
|
|
|
|
83
|
my $str = $self->{str}; |
|
396
|
|
|
|
|
|
|
|
|
397
|
49
|
|
|
|
|
62
|
my $sol = $pos; |
|
398
|
49
|
100
|
100
|
|
|
217
|
$sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/; |
|
399
|
49
|
|
100
|
|
|
299
|
$sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/; |
|
400
|
|
|
|
|
|
|
|
|
401
|
49
|
|
|
|
|
83
|
my $eol = $pos; |
|
402
|
49
|
|
100
|
|
|
477
|
$eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/; |
|
403
|
|
|
|
|
|
|
|
|
404
|
49
|
|
|
|
|
117
|
my $line = substr( $str, $sol, $eol - $sol ); |
|
405
|
|
|
|
|
|
|
|
|
406
|
49
|
|
|
|
|
105
|
my $col = $pos - $sol; |
|
407
|
49
|
|
|
|
|
146
|
my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1; |
|
408
|
|
|
|
|
|
|
|
|
409
|
49
|
|
|
|
|
164
|
return ( $lineno, $col, $line ); |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 fail |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 fail_from |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
$parser->fail( $message ) |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
$parser->fail_from( $pos, $message ) |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
I since version 0.09.> |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Aborts the current parse attempt with the given message string. The failure |
|
423
|
|
|
|
|
|
|
message will include the line and column position, and the line of input that |
|
424
|
|
|
|
|
|
|
failed at the current parse position (C), or a position earlier obtained |
|
425
|
|
|
|
|
|
|
using the C method (C). |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
This failure will propagate up to the inner-most structure parsing method that |
|
428
|
|
|
|
|
|
|
has not been committed; or will cause the entire parser to fail if there are |
|
429
|
|
|
|
|
|
|
no further options to take. |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=cut |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub fail |
|
434
|
|
|
|
|
|
|
{ |
|
435
|
272
|
|
|
272
|
1
|
346
|
my $self = shift; |
|
436
|
272
|
|
|
|
|
377
|
my ( $message ) = @_; |
|
437
|
272
|
|
|
|
|
484
|
$self->fail_from( $self->pos, $message ); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub fail_from |
|
441
|
|
|
|
|
|
|
{ |
|
442
|
272
|
|
|
272
|
1
|
311
|
my $self = shift; |
|
443
|
272
|
|
|
|
|
374
|
my ( $pos, $message ) = @_; |
|
444
|
272
|
|
|
|
|
527
|
die Parser::MGC::Failure->new( $message, $self, $pos ); |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# On perl 5.32 onwards we can use the nicer `isa` infix operator |
|
448
|
|
|
|
|
|
|
# Problem is it won't even parse correctly on older perls so we'll have to go |
|
449
|
|
|
|
|
|
|
# the long way around |
|
450
|
|
|
|
|
|
|
*_isa_failure = ( $^V ge v5.32 ) |
|
451
|
|
|
|
|
|
|
? do { eval 'use experimental "isa"; sub { $_[0] isa Parser::MGC::Failure }' // die $@ } |
|
452
|
|
|
|
|
|
|
: do { require Scalar::Util; |
|
453
|
253
|
100
|
|
253
|
|
2186
|
sub { Scalar::Util::blessed($_[0]) and $_[0]->isa( "Parser::MGC::Failure" ) } }; |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 die |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 die_from |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
$parser->die( $message ) |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
$parser->die_from( $pos, $message ) |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
I |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Throws an exception that propagates as normal for C, entirely out of the |
|
466
|
|
|
|
|
|
|
entire parser and to the caller of the toplevel C method that invoked |
|
467
|
|
|
|
|
|
|
it, bypassing all of the back-tracking logic. |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This is much like using core's C directly, except that the message string |
|
470
|
|
|
|
|
|
|
will include the line and column position, and the line of input that the |
|
471
|
|
|
|
|
|
|
parser was working on, as it does in the L method. |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
This method is intended for reporting fatal errors where the parsed input was |
|
474
|
|
|
|
|
|
|
correctly recognised at a grammar level, but is requesting something that |
|
475
|
|
|
|
|
|
|
cannot be fulfilled semantically. |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=cut |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
sub die :method |
|
480
|
|
|
|
|
|
|
{ |
|
481
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
|
482
|
1
|
|
|
|
|
1
|
my ( $message ) = @_; |
|
483
|
1
|
|
|
|
|
3
|
$self->die_from( $self->pos, $message ); |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub die_from |
|
487
|
|
|
|
|
|
|
{ |
|
488
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
489
|
1
|
|
|
|
|
1
|
my ( $pos, $message ) = @_; |
|
490
|
|
|
|
|
|
|
# Convenient just to use the ->STRING method of a Failure object but don't |
|
491
|
|
|
|
|
|
|
# throw it directly |
|
492
|
1
|
|
|
|
|
3
|
die Parser::MGC::Failure->new( $message, $self, $pos )->STRING; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head2 at_eos |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
$eos = $parser->at_eos |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Returns true if the input string is at the end of the string. |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=cut |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub at_eos |
|
504
|
|
|
|
|
|
|
{ |
|
505
|
596
|
|
|
596
|
1
|
743
|
my $self = shift; |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# Save pos() before skipping ws so we don't break the substring_before method |
|
508
|
596
|
|
|
|
|
797
|
my $pos = pos $self->{str}; |
|
509
|
|
|
|
|
|
|
|
|
510
|
596
|
|
|
|
|
1050
|
$self->skip_ws; |
|
511
|
|
|
|
|
|
|
|
|
512
|
596
|
|
|
|
|
632
|
my $at_eos; |
|
513
|
596
|
100
|
|
|
|
1186
|
if( pos( $self->{str} ) >= length $self->{str} ) { |
|
|
|
100
|
|
|
|
|
|
|
514
|
162
|
|
|
|
|
200
|
$at_eos = 1; |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
elsif( defined $self->{endofscope} ) { |
|
517
|
114
|
|
|
|
|
396
|
$at_eos = $self->{str} =~ m/\G$self->{endofscope}/; |
|
518
|
|
|
|
|
|
|
} |
|
519
|
|
|
|
|
|
|
else { |
|
520
|
320
|
|
|
|
|
357
|
$at_eos = 0; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
|
|
523
|
596
|
|
|
|
|
1022
|
pos( $self->{str} ) = $pos; |
|
524
|
|
|
|
|
|
|
|
|
525
|
596
|
|
|
|
|
1326
|
return $at_eos; |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head2 scope_level |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
$level = $parser->scope_level |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
I |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Returns the number of nested C calls that have been made. |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=cut |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub scope_level |
|
539
|
|
|
|
|
|
|
{ |
|
540
|
5
|
|
|
5
|
1
|
7
|
my $self = shift; |
|
541
|
5
|
|
|
|
|
34
|
return $self->{scope_level}; |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head1 STRUCTURE-FORMING METHODS |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
The following methods may be used to build a grammatical structure out of the |
|
547
|
|
|
|
|
|
|
defined basic token-parsing methods. Each takes at least one code reference, |
|
548
|
|
|
|
|
|
|
which will be passed the actual C<$parser> object as its first argument. |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Anywhere that a code reference is expected also permits a plain string giving |
|
551
|
|
|
|
|
|
|
the name of a method to invoke. This is sufficient in many simple cases, such |
|
552
|
|
|
|
|
|
|
as |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
$self->any_of( |
|
555
|
|
|
|
|
|
|
'token_int', |
|
556
|
|
|
|
|
|
|
'token_string', |
|
557
|
|
|
|
|
|
|
... |
|
558
|
|
|
|
|
|
|
); |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=cut |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=head2 maybe |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
$ret = $parser->maybe( $code ) |
|
565
|
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
Attempts to execute the given C<$code> in scalar context, and returns what it |
|
567
|
|
|
|
|
|
|
returned, accepting that it might fail. C<$code> may either be a CODE |
|
568
|
|
|
|
|
|
|
reference or a method name given as a string. |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
If the code fails (either by calling C itself, or by propagating a |
|
571
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C, then |
|
572
|
|
|
|
|
|
|
none of the input string will be consumed; the current parsing position will |
|
573
|
|
|
|
|
|
|
be restored. C will be returned in this case. |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
If it calls C then any subsequent failure will be propagated to the |
|
576
|
|
|
|
|
|
|
caller, rather than returning C. |
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
This may be considered to be similar to the C> regexp qualifier. |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
sub parse_declaration |
|
581
|
|
|
|
|
|
|
{ |
|
582
|
|
|
|
|
|
|
my $self = shift; |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
[ $self->parse_type, |
|
585
|
|
|
|
|
|
|
$self->token_ident, |
|
586
|
|
|
|
|
|
|
$self->maybe( sub { |
|
587
|
|
|
|
|
|
|
$self->expect( "=" ); |
|
588
|
|
|
|
|
|
|
$self->parse_expression |
|
589
|
|
|
|
|
|
|
} ), |
|
590
|
|
|
|
|
|
|
]; |
|
591
|
|
|
|
|
|
|
} |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=cut |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
sub maybe |
|
596
|
|
|
|
|
|
|
{ |
|
597
|
4
|
|
|
4
|
1
|
26
|
my $self = shift; |
|
598
|
4
|
|
|
|
|
7
|
my ( $code ) = @_; |
|
599
|
|
|
|
|
|
|
|
|
600
|
4
|
|
|
|
|
5
|
my $pos = pos $self->{str}; |
|
601
|
|
|
|
|
|
|
|
|
602
|
4
|
|
|
|
|
6
|
my $committed = 0; |
|
603
|
4
|
|
|
0
|
|
10
|
local $self->{committer} = sub { $committed++ }; |
|
|
0
|
|
|
|
|
0
|
|
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
try { |
|
606
|
|
|
|
|
|
|
return $self->$code; |
|
607
|
|
|
|
|
|
|
} |
|
608
|
4
|
|
|
|
|
10
|
catch ( $e ) { |
|
609
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
|
612
|
|
|
|
|
|
|
return undef; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=head2 scope_of |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
$ret = $parser->scope_of( $start, $code, $stop ) |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Expects to find the C<$start> pattern, then attempts to execute the given |
|
621
|
|
|
|
|
|
|
C<$code>, then expects to find the C<$stop> pattern. Returns whatever the |
|
622
|
|
|
|
|
|
|
code returned. C<$code> may either be a CODE reference of a method name given |
|
623
|
|
|
|
|
|
|
as a string. |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
While the code is being executed, the C<$stop> pattern will be used by the |
|
626
|
|
|
|
|
|
|
token parsing methods as an end-of-scope marker; causing them to raise a |
|
627
|
|
|
|
|
|
|
failure if called at the end of a scope. |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub parse_block |
|
630
|
|
|
|
|
|
|
{ |
|
631
|
|
|
|
|
|
|
my $self = shift; |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
$self->scope_of( "{", 'parse_statements', "}" ); |
|
634
|
|
|
|
|
|
|
} |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
If the C<$start> pattern is undefined, it is presumed the caller has already |
|
637
|
|
|
|
|
|
|
checked for this. This is useful when the stop pattern needs to be calculated |
|
638
|
|
|
|
|
|
|
based on the start pattern. |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
sub parse_bracketed |
|
641
|
|
|
|
|
|
|
{ |
|
642
|
|
|
|
|
|
|
my $self = shift; |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
my $delim = $self->expect( qr/[\(\[\<\{]/ ); |
|
645
|
|
|
|
|
|
|
$delim =~ tr/([<{/)]>}/; |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
$self->scope_of( undef, 'parse_body', $delim ); |
|
648
|
|
|
|
|
|
|
} |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
This method does not have any optional parts to it; any failures are |
|
651
|
|
|
|
|
|
|
immediately propagated to the caller. |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=cut |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub scope_of |
|
656
|
|
|
|
|
|
|
{ |
|
657
|
19
|
|
|
19
|
1
|
93
|
my $self = shift; |
|
658
|
19
|
|
|
|
|
53
|
$self->_scope_of( 0, @_ ); |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub _scope_of |
|
662
|
|
|
|
|
|
|
{ |
|
663
|
73
|
|
|
73
|
|
79
|
my $self = shift; |
|
664
|
73
|
|
|
|
|
123
|
my ( $commit_if_started, $start, $code, $stop ) = @_; |
|
665
|
|
|
|
|
|
|
|
|
666
|
73
|
50
|
|
|
|
426
|
ref $stop or $stop = qr/\Q$stop/; |
|
667
|
|
|
|
|
|
|
|
|
668
|
73
|
100
|
|
|
|
249
|
$self->expect( $start ) if defined $start; |
|
669
|
|
|
|
|
|
|
|
|
670
|
34
|
100
|
|
|
|
108
|
$self->commit if $commit_if_started; |
|
671
|
|
|
|
|
|
|
|
|
672
|
34
|
|
|
|
|
66
|
local $self->{endofscope} = $stop; |
|
673
|
34
|
|
|
|
|
69
|
local $self->{scope_level} = $self->{scope_level} + 1; |
|
674
|
|
|
|
|
|
|
|
|
675
|
34
|
|
|
|
|
89
|
my $ret = $self->$code; |
|
676
|
|
|
|
|
|
|
|
|
677
|
31
|
|
|
|
|
87
|
$self->expect( $stop ); |
|
678
|
|
|
|
|
|
|
|
|
679
|
30
|
|
|
|
|
156
|
return $ret; |
|
680
|
|
|
|
|
|
|
} |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head2 committed_scope_of |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
$ret = $parser->committed_scope_of( $start, $code, $stop ) |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
I |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
A variant of L that calls L after a successful match of |
|
689
|
|
|
|
|
|
|
the start pattern. This is usually what you want if using C from |
|
690
|
|
|
|
|
|
|
within an C choice, if no other alternative following this one could |
|
691
|
|
|
|
|
|
|
possibly match if the start pattern has. |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
=cut |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
sub committed_scope_of |
|
696
|
|
|
|
|
|
|
{ |
|
697
|
54
|
|
|
54
|
1
|
70
|
my $self = shift; |
|
698
|
54
|
|
|
|
|
99
|
$self->_scope_of( 1, @_ ); |
|
699
|
|
|
|
|
|
|
} |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 list_of |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
$ret = $parser->list_of( $sep, $code ) |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Expects to find a list of instances of something parsed by C<$code>, |
|
706
|
|
|
|
|
|
|
separated by the C<$sep> pattern. Returns an ARRAY ref containing a list of |
|
707
|
|
|
|
|
|
|
the return values from the C<$code>. A single trailing delimiter is allowed, |
|
708
|
|
|
|
|
|
|
and does not affect the return value. C<$code> may either be a CODE reference |
|
709
|
|
|
|
|
|
|
or a method name given as a string. It is called in list context, and whatever |
|
710
|
|
|
|
|
|
|
values it returns are appended to the eventual result - similar to perl's |
|
711
|
|
|
|
|
|
|
C |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
This method does not consider it an error if the returned list is empty; that |
|
714
|
|
|
|
|
|
|
is, that the scope ended before any item instances were parsed from it. |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
sub parse_numbers |
|
717
|
|
|
|
|
|
|
{ |
|
718
|
|
|
|
|
|
|
my $self = shift; |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
$self->list_of( ",", 'token_int' ); |
|
721
|
|
|
|
|
|
|
} |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
If the code fails (either by invoking C itself, or by propagating a |
|
724
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C on a |
|
725
|
|
|
|
|
|
|
particular item, then the item is aborted and the parsing position will be |
|
726
|
|
|
|
|
|
|
restored to the beginning of that failed item. The list of results from |
|
727
|
|
|
|
|
|
|
previous successful attempts will be returned. |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
If it calls C within an item then any subsequent failure for that item |
|
730
|
|
|
|
|
|
|
will cause the entire C to fail, propagating that to the caller. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=cut |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
sub list_of |
|
735
|
|
|
|
|
|
|
{ |
|
736
|
72
|
|
|
72
|
1
|
128
|
my $self = shift; |
|
737
|
72
|
|
|
|
|
108
|
my ( $sep, $code ) = @_; |
|
738
|
|
|
|
|
|
|
|
|
739
|
72
|
100
|
33
|
|
|
207
|
ref $sep or $sep = qr/\Q$sep/ if defined $sep; |
|
740
|
|
|
|
|
|
|
|
|
741
|
72
|
|
|
|
|
81
|
my $committed; |
|
742
|
72
|
|
|
14
|
|
226
|
local $self->{committer} = sub { $committed++ }; |
|
|
14
|
|
|
|
|
22
|
|
|
743
|
|
|
|
|
|
|
|
|
744
|
72
|
|
|
|
|
99
|
my @ret; |
|
745
|
|
|
|
|
|
|
|
|
746
|
72
|
|
|
|
|
140
|
while( !$self->at_eos ) { |
|
747
|
127
|
|
|
|
|
176
|
$committed = 0; |
|
748
|
127
|
|
|
|
|
154
|
my $pos = pos $self->{str}; |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
try { |
|
751
|
|
|
|
|
|
|
push @ret, $self->$code; |
|
752
|
|
|
|
|
|
|
next; |
|
753
|
|
|
|
|
|
|
} |
|
754
|
|
|
|
|
|
|
catch ( $e ) { |
|
755
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
|
756
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
last; |
|
759
|
|
|
|
|
|
|
} |
|
760
|
127
|
|
|
|
|
212
|
} |
|
761
|
|
|
|
|
|
|
continue { |
|
762
|
103
|
100
|
|
|
|
224
|
if( defined $sep ) { |
|
763
|
32
|
|
|
|
|
58
|
$self->skip_ws; |
|
764
|
32
|
100
|
|
|
|
145
|
$self->{str} =~ m/\G$sep/gc or last; |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
} |
|
767
|
|
|
|
|
|
|
|
|
768
|
71
|
|
|
|
|
247
|
return \@ret; |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=head2 sequence_of |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
$ret = $parser->sequence_of( $code ) |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
A shortcut for calling C with an empty string as separator; expects |
|
776
|
|
|
|
|
|
|
to find at least one instance of something parsed by C<$code>, separated only |
|
777
|
|
|
|
|
|
|
by skipped whitespace. |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
This may be considered to be similar to the C<+> or C<*> regexp qualifiers. |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
sub parse_statements |
|
782
|
|
|
|
|
|
|
{ |
|
783
|
|
|
|
|
|
|
my $self = shift; |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
$self->sequence_of( 'parse_statement' ); |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
The interaction of failures in the code and the C method is identical |
|
789
|
|
|
|
|
|
|
to that of C. |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=cut |
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
sub sequence_of |
|
794
|
|
|
|
|
|
|
{ |
|
795
|
56
|
|
|
56
|
1
|
169
|
my $self = shift; |
|
796
|
56
|
|
|
|
|
91
|
my ( $code ) = @_; |
|
797
|
|
|
|
|
|
|
|
|
798
|
56
|
|
|
|
|
125
|
$self->list_of( undef, $code ); |
|
799
|
|
|
|
|
|
|
} |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head2 any_of |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
$ret = $parser->any_of( @codes ) |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
I |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
Expects that one of the given code instances can parse something from the |
|
808
|
|
|
|
|
|
|
input, returning what it returned. Each code instance may indicate a failure |
|
809
|
|
|
|
|
|
|
to parse by calling the C method or otherwise propagating a failure. |
|
810
|
|
|
|
|
|
|
Each code instance may either be a CODE reference or a method name given as a |
|
811
|
|
|
|
|
|
|
string. |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
This may be considered to be similar to the C<|> regexp operator for forming |
|
814
|
|
|
|
|
|
|
alternations of possible parse trees. |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
sub parse_statement |
|
817
|
|
|
|
|
|
|
{ |
|
818
|
|
|
|
|
|
|
my $self = shift; |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$self->any_of( |
|
821
|
|
|
|
|
|
|
sub { $self->parse_declaration; $self->expect(";") }, |
|
822
|
|
|
|
|
|
|
sub { $self->parse_expression; $self->expect(";") }, |
|
823
|
|
|
|
|
|
|
sub { $self->parse_block }, |
|
824
|
|
|
|
|
|
|
); |
|
825
|
|
|
|
|
|
|
} |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
If the code for a given choice fails (either by invoking C itself, or by |
|
828
|
|
|
|
|
|
|
propagating a failure from another method it invoked) before it has invoked |
|
829
|
|
|
|
|
|
|
C itself, then the parsing position restored and the next choice will |
|
830
|
|
|
|
|
|
|
be attempted. |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
If it calls C then any subsequent failure for that choice will cause |
|
833
|
|
|
|
|
|
|
the entire C to fail, propagating that to the caller and no further |
|
834
|
|
|
|
|
|
|
choices will be attempted. |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
If none of the choices match then a simple failure message is printed: |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
Found nothing parseable |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
As this is unlikely to be helpful to users, a better message can be provided |
|
841
|
|
|
|
|
|
|
by the final choice instead. Don't forget to C before printing the |
|
842
|
|
|
|
|
|
|
failure message, or it won't count. |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
$self->any_of( |
|
845
|
|
|
|
|
|
|
'token_int', |
|
846
|
|
|
|
|
|
|
'token_string', |
|
847
|
|
|
|
|
|
|
..., |
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
sub { $self->commit; $self->fail( "Expected an int or string" ) } |
|
850
|
|
|
|
|
|
|
); |
|
851
|
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
sub any_of |
|
855
|
|
|
|
|
|
|
{ |
|
856
|
218
|
|
|
218
|
1
|
379
|
my $self = shift; |
|
857
|
|
|
|
|
|
|
|
|
858
|
218
|
|
|
|
|
399
|
while( @_ ) { |
|
859
|
433
|
|
|
|
|
536
|
my $code = shift; |
|
860
|
433
|
|
|
|
|
508
|
my $pos = pos $self->{str}; |
|
861
|
|
|
|
|
|
|
|
|
862
|
433
|
|
|
|
|
457
|
my $committed = 0; |
|
863
|
433
|
|
|
55
|
|
1091
|
local $self->{committer} = sub { $committed++ }; |
|
|
55
|
|
|
|
|
80
|
|
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
try { |
|
866
|
|
|
|
|
|
|
return $self->$code; |
|
867
|
|
|
|
|
|
|
} |
|
868
|
433
|
|
|
|
|
741
|
catch ( $e ) { |
|
869
|
|
|
|
|
|
|
pos( $self->{str} ) = $pos; |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
|
872
|
|
|
|
|
|
|
} |
|
873
|
|
|
|
|
|
|
} |
|
874
|
|
|
|
|
|
|
|
|
875
|
12
|
|
|
|
|
26
|
$self->fail( "Found nothing parseable" ); |
|
876
|
|
|
|
|
|
|
} |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
sub one_of { |
|
879
|
0
|
|
|
0
|
0
|
0
|
croak "Parser::MGC->one_of is deprecated; use ->any_of instead"; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=head2 commit |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
$parser->commit |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
Calling this method will cancel the backtracking behaviour of the innermost |
|
887
|
|
|
|
|
|
|
C, C, C, or C structure forming method. |
|
888
|
|
|
|
|
|
|
That is, if later code then calls C, the exception will be propagated |
|
889
|
|
|
|
|
|
|
out of C, no further list items will be attempted by C or |
|
890
|
|
|
|
|
|
|
C, and no further code blocks will be attempted by C. |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
Typically this will be called once the grammatical structure alter has been |
|
893
|
|
|
|
|
|
|
determined, ensuring that any further failures are raised as real exceptions, |
|
894
|
|
|
|
|
|
|
rather than by attempting other alternatives. |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
sub parse_statement |
|
897
|
|
|
|
|
|
|
{ |
|
898
|
|
|
|
|
|
|
my $self = shift; |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
$self->any_of( |
|
901
|
|
|
|
|
|
|
... |
|
902
|
|
|
|
|
|
|
sub { |
|
903
|
|
|
|
|
|
|
$self->scope_of( "{", |
|
904
|
|
|
|
|
|
|
sub { $self->commit; $self->parse_statements; }, |
|
905
|
|
|
|
|
|
|
"}" ), |
|
906
|
|
|
|
|
|
|
}, |
|
907
|
|
|
|
|
|
|
); |
|
908
|
|
|
|
|
|
|
} |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Though in this common pattern, L may be used instead. |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
=cut |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
sub commit |
|
915
|
|
|
|
|
|
|
{ |
|
916
|
69
|
|
|
69
|
1
|
101
|
my $self = shift; |
|
917
|
69
|
50
|
|
|
|
117
|
if( $self->{committer} ) { |
|
918
|
69
|
|
|
|
|
108
|
$self->{committer}->(); |
|
919
|
|
|
|
|
|
|
} |
|
920
|
|
|
|
|
|
|
else { |
|
921
|
0
|
|
|
|
|
0
|
croak "Cannot commit except within a backtrack-able structure"; |
|
922
|
|
|
|
|
|
|
} |
|
923
|
|
|
|
|
|
|
} |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
=head1 TOKEN PARSING METHODS |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
The following methods attempt to consume some part of the input string, to be |
|
928
|
|
|
|
|
|
|
used as part of the parsing process. |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=cut |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
sub skip_ws |
|
933
|
|
|
|
|
|
|
{ |
|
934
|
1283
|
|
|
1283
|
0
|
1368
|
my $self = shift; |
|
935
|
|
|
|
|
|
|
|
|
936
|
1283
|
|
|
|
|
1476
|
my $pattern = $self->{patterns}{_skip}; |
|
937
|
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
{ |
|
939
|
1283
|
|
|
|
|
1283
|
1 while $self->{str} =~ m/\G$pattern/gc; |
|
|
1286
|
|
|
|
|
5533
|
|
|
940
|
|
|
|
|
|
|
|
|
941
|
1286
|
100
|
|
|
|
2800
|
return if pos( $self->{str} ) < length $self->{str}; |
|
942
|
|
|
|
|
|
|
|
|
943
|
222
|
100
|
|
|
|
483
|
return unless $self->{reader}; |
|
944
|
|
|
|
|
|
|
|
|
945
|
4
|
|
|
|
|
9
|
my $more = $self->{reader}->( $self ); |
|
946
|
4
|
100
|
|
|
|
12
|
if( defined $more ) { |
|
947
|
3
|
|
|
|
|
5
|
my $pos = pos( $self->{str} ); |
|
948
|
3
|
|
|
|
|
7
|
$self->{str} .= $more; |
|
949
|
3
|
|
|
|
|
4
|
pos( $self->{str} ) = $pos; |
|
950
|
|
|
|
|
|
|
|
|
951
|
3
|
|
|
|
|
7
|
redo; |
|
952
|
|
|
|
|
|
|
} |
|
953
|
|
|
|
|
|
|
|
|
954
|
1
|
|
|
|
|
3
|
undef $self->{reader}; |
|
955
|
1
|
|
|
|
|
1
|
return; |
|
956
|
|
|
|
|
|
|
} |
|
957
|
|
|
|
|
|
|
} |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=head2 expect |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
$str = $parser->expect( $literal ) |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
$str = $parser->expect( qr/pattern/ ) |
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
@groups = $parser->expect( qr/pattern/ ) |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
Expects to find a literal string or regexp pattern match, and consumes it. |
|
968
|
|
|
|
|
|
|
In scalar context, this method returns the string that was captured. In list |
|
969
|
|
|
|
|
|
|
context it returns the matching substring and the contents of any subgroups |
|
970
|
|
|
|
|
|
|
contained in the pattern. |
|
971
|
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
This method will raise a parse error (by calling C) if the regexp fails |
|
973
|
|
|
|
|
|
|
to match. Note that if the pattern could match an empty string (such as for |
|
974
|
|
|
|
|
|
|
example C), the pattern will always match, even if it has to match an |
|
975
|
|
|
|
|
|
|
empty string. This method will not consider a failure if the regexp matches |
|
976
|
|
|
|
|
|
|
with zero-width. |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head2 maybe_expect |
|
979
|
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
$str = $parser->maybe_expect( ... ) |
|
981
|
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
@groups = $parser->maybe_expect( ... ) |
|
983
|
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
I |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
A convenient shortcut equivalent to calling C within C, but |
|
987
|
|
|
|
|
|
|
implemented more efficiently, avoiding the exception-handling set up by |
|
988
|
|
|
|
|
|
|
C. Returns C or an empty list if the match fails. |
|
989
|
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
=cut |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
sub maybe_expect |
|
993
|
|
|
|
|
|
|
{ |
|
994
|
371
|
|
|
371
|
1
|
461
|
my $self = shift; |
|
995
|
371
|
|
|
|
|
482
|
my ( $expect ) = @_; |
|
996
|
|
|
|
|
|
|
|
|
997
|
371
|
100
|
|
|
|
624
|
ref $expect or $expect = qr/\Q$expect/; |
|
998
|
|
|
|
|
|
|
|
|
999
|
371
|
|
|
|
|
637
|
$self->skip_ws; |
|
1000
|
371
|
100
|
|
|
|
3512
|
$self->{str} =~ m/\G$expect/gc or return; |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
210
|
100
|
|
|
|
1098
|
return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray; |
|
1003
|
22
|
100
|
|
|
|
56
|
return map { defined $-[$_] ? substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) : undef } 0 .. $#+; |
|
|
37
|
|
|
|
|
226
|
|
|
1004
|
|
|
|
|
|
|
} |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
sub expect |
|
1007
|
|
|
|
|
|
|
{ |
|
1008
|
355
|
|
|
355
|
1
|
544
|
my $self = shift; |
|
1009
|
355
|
|
|
|
|
507
|
my ( $expect ) = @_; |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
355
|
100
|
|
|
|
2156
|
ref $expect or $expect = qr/\Q$expect/; |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
355
|
100
|
|
|
|
657
|
if( wantarray ) { |
|
1014
|
31
|
100
|
|
|
|
64
|
my @ret = $self->maybe_expect( $expect ) or |
|
1015
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
|
1016
|
20
|
|
|
|
|
80
|
return @ret; |
|
1017
|
|
|
|
|
|
|
} |
|
1018
|
|
|
|
|
|
|
else { |
|
1019
|
324
|
100
|
|
|
|
553
|
defined( my $ret = $self->maybe_expect( $expect ) ) or |
|
1020
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
|
1021
|
185
|
|
|
|
|
502
|
return $ret; |
|
1022
|
|
|
|
|
|
|
} |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
=head2 substring_before |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
$str = $parser->substring_before( $literal ) |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
$str = $parser->substring_before( qr/pattern/ ) |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
I |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Expects to possibly find a literal string or regexp pattern match. If it finds |
|
1034
|
|
|
|
|
|
|
such, consume all the input text before but excluding this match, and return |
|
1035
|
|
|
|
|
|
|
it. If it fails to find a match before the end of the current scope, consumes |
|
1036
|
|
|
|
|
|
|
all the input text until the end of scope and return it. |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
This method does not consume the part of input that matches, only the text |
|
1039
|
|
|
|
|
|
|
before it. It is not considered a failure if the substring before this match |
|
1040
|
|
|
|
|
|
|
is empty. If a non-empty match is required, use the C method: |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
sub token_nonempty_part |
|
1043
|
|
|
|
|
|
|
{ |
|
1044
|
|
|
|
|
|
|
my $self = shift; |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
my $str = $parser->substring_before( "," ); |
|
1047
|
|
|
|
|
|
|
length $str or $self->fail( "Expected a string fragment before ," ); |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
return $str; |
|
1050
|
|
|
|
|
|
|
} |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
Note that unlike most of the other token parsing methods, this method does not |
|
1053
|
|
|
|
|
|
|
consume either leading or trailing whitespace around the substring. It is |
|
1054
|
|
|
|
|
|
|
expected that this method would be used as part a parser to read quoted |
|
1055
|
|
|
|
|
|
|
strings, or similar cases where whitespace should be preserved. |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
=head2 nonempty_substring_before |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( $literal ) |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
$str = $parser->nonempty_substring_before( qr/pattern/ ) |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
I |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
A variant of L which fails if the matched part is empty. |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
The example above could have been written: |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
sub token_nonempty_part |
|
1070
|
|
|
|
|
|
|
{ |
|
1071
|
|
|
|
|
|
|
my $self = shift; |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
return $parser->nonempty_substring_before( "," ); |
|
1074
|
|
|
|
|
|
|
} |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
This is often useful for breaking out of repeating loops; e.g. |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
sub token_escaped_string |
|
1079
|
|
|
|
|
|
|
{ |
|
1080
|
|
|
|
|
|
|
my $self = shift; |
|
1081
|
|
|
|
|
|
|
$self->expect( '"' ); |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
my $ret = ""; |
|
1084
|
|
|
|
|
|
|
1 while $self->any_of( |
|
1085
|
|
|
|
|
|
|
sub { $ret .= $self->nonempty_substring_before( qr/%|$/m ); 1 } |
|
1086
|
|
|
|
|
|
|
sub { my $escape = ( $self->expect( qr/%(.)/ ) )[1]; |
|
1087
|
|
|
|
|
|
|
$ret .= _handle_escape( $escape ); |
|
1088
|
|
|
|
|
|
|
1 }, |
|
1089
|
|
|
|
|
|
|
sub { 0 }, |
|
1090
|
|
|
|
|
|
|
) |
|
1091
|
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
return $ret; |
|
1093
|
|
|
|
|
|
|
} |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=cut |
|
1096
|
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub _substring_before |
|
1098
|
|
|
|
|
|
|
{ |
|
1099
|
41
|
|
|
41
|
|
50
|
my $self = shift; |
|
1100
|
41
|
|
|
|
|
65
|
my ( $expect, $fail_if_empty ) = @_; |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
41
|
100
|
|
|
|
150
|
ref $expect or $expect = qr/\Q$expect/; |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
41
|
100
|
|
|
|
131
|
my $endre = ( defined $self->{endofscope} ) ? |
|
1105
|
|
|
|
|
|
|
qr/$expect|$self->{endofscope}/ : |
|
1106
|
|
|
|
|
|
|
$expect; |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
# NO skip_ws |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
41
|
|
|
|
|
59
|
my $start = pos $self->{str}; |
|
1111
|
41
|
|
|
|
|
43
|
my $end; |
|
1112
|
41
|
100
|
|
|
|
422
|
if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) { |
|
1113
|
38
|
|
|
|
|
94
|
$end = $-[1]; |
|
1114
|
|
|
|
|
|
|
} |
|
1115
|
|
|
|
|
|
|
else { |
|
1116
|
3
|
|
|
|
|
8
|
$end = length $self->{str}; |
|
1117
|
|
|
|
|
|
|
} |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
41
|
100
|
66
|
|
|
106
|
$self->fail( "Expected to find a non-empty substring before $expect" ) |
|
1120
|
|
|
|
|
|
|
if $fail_if_empty and $end == $start; |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
40
|
|
|
|
|
92
|
return $self->take( $end - $start ); |
|
1123
|
|
|
|
|
|
|
} |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
sub substring_before |
|
1126
|
|
|
|
|
|
|
{ |
|
1127
|
40
|
|
|
40
|
1
|
63
|
my $self = shift; |
|
1128
|
40
|
|
|
|
|
71
|
return $self->_substring_before( $_[0], 0 ); |
|
1129
|
|
|
|
|
|
|
} |
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
sub nonempty_substring_before |
|
1132
|
|
|
|
|
|
|
{ |
|
1133
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
|
1134
|
1
|
|
|
|
|
13
|
return $self->_substring_before( $_[0], 1 ); |
|
1135
|
|
|
|
|
|
|
} |
|
1136
|
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
=head2 generic_token |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
$val = $parser->generic_token( $name, $re, $convert ) |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
I |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
Expects to find a token matching the precompiled regexp C<$re>. If provided, |
|
1144
|
|
|
|
|
|
|
the C<$convert> CODE reference can be used to convert the string into a more |
|
1145
|
|
|
|
|
|
|
convenient form. C<$name> is used in the failure message if the pattern fails |
|
1146
|
|
|
|
|
|
|
to match. |
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
If provided, the C<$convert> function will be passed the parser and the |
|
1149
|
|
|
|
|
|
|
matching substring; the value it returns is returned from C. |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
$convert->( $parser, $substr ) |
|
1152
|
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
If not provided, the substring will be returned as it stands. |
|
1154
|
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
This method is mostly provided for subclasses to define their own token types. |
|
1156
|
|
|
|
|
|
|
For example: |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
sub token_hex |
|
1159
|
|
|
|
|
|
|
{ |
|
1160
|
|
|
|
|
|
|
my $self = shift; |
|
1161
|
|
|
|
|
|
|
$self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } ); |
|
1162
|
|
|
|
|
|
|
} |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
=cut |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
sub generic_token |
|
1167
|
|
|
|
|
|
|
{ |
|
1168
|
230
|
|
|
230
|
1
|
273
|
my $self = shift; |
|
1169
|
230
|
|
|
|
|
359
|
my ( $name, $re, $convert ) = @_; |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
230
|
50
|
|
|
|
377
|
$self->fail( "Expected $name" ) if $self->at_eos; |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
230
|
|
|
|
|
432
|
$self->skip_ws; |
|
1174
|
230
|
100
|
|
|
|
2985
|
$self->{str} =~ m/\G$re/gc or |
|
1175
|
|
|
|
|
|
|
$self->fail( "Expected $name" ); |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
169
|
|
|
|
|
762
|
my $match = substr( $self->{str}, $-[0], $+[0] - $-[0] ); |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
169
|
100
|
|
|
|
558
|
return $convert ? $convert->( $self, $match ) : $match; |
|
1180
|
|
|
|
|
|
|
} |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
sub _token_generic |
|
1183
|
|
|
|
|
|
|
{ |
|
1184
|
226
|
|
|
226
|
|
270
|
my $self = shift; |
|
1185
|
226
|
|
|
|
|
559
|
my %args = @_; |
|
1186
|
|
|
|
|
|
|
|
|
1187
|
226
|
|
|
|
|
311
|
my $name = $args{name}; |
|
1188
|
226
|
50
|
|
|
|
501
|
my $re = $args{pattern} ? $self->{patterns}{ $args{pattern} } : $args{re}; |
|
1189
|
226
|
|
|
|
|
257
|
my $convert = $args{convert}; |
|
1190
|
|
|
|
|
|
|
|
|
1191
|
226
|
|
|
|
|
433
|
$self->generic_token( $name, $re, $convert ); |
|
1192
|
|
|
|
|
|
|
} |
|
1193
|
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 token_int |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
$int = $parser->token_int |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
Expects to find an integer in decimal, octal or hexadecimal notation, and |
|
1199
|
|
|
|
|
|
|
consumes it. Negative integers, preceeded by C<->, are also recognised. |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
=cut |
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
sub token_int |
|
1204
|
|
|
|
|
|
|
{ |
|
1205
|
134
|
|
|
134
|
1
|
289
|
my $self = shift; |
|
1206
|
|
|
|
|
|
|
$self->_token_generic( |
|
1207
|
|
|
|
|
|
|
name => "int", |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
pattern => "int", |
|
1210
|
|
|
|
|
|
|
convert => sub { |
|
1211
|
104
|
|
|
104
|
|
151
|
my $int = $_[1]; |
|
1212
|
104
|
100
|
|
|
|
213
|
my $sign = ( $int =~ s/^-// ) ? -1 : 1; |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
104
|
|
|
|
|
126
|
$int =~ s/^0o/0/; |
|
1215
|
|
|
|
|
|
|
|
|
1216
|
104
|
100
|
|
|
|
227
|
return $sign * oct $int if $int =~ m/^0/; |
|
1217
|
99
|
|
|
|
|
659
|
return $sign * $int; |
|
1218
|
|
|
|
|
|
|
}, |
|
1219
|
134
|
|
|
|
|
463
|
); |
|
1220
|
|
|
|
|
|
|
} |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=head2 token_float |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
$float = $parser->token_float |
|
1225
|
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
I |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
Expects to find a number expressed in floating-point notation; a sequence of |
|
1229
|
|
|
|
|
|
|
digits possibly prefixed by C<->, possibly containing a decimal point, |
|
1230
|
|
|
|
|
|
|
possibly followed by an exponent specified by C followed by an integer. The |
|
1231
|
|
|
|
|
|
|
numerical value is then returned. |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
=cut |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
sub token_float |
|
1236
|
|
|
|
|
|
|
{ |
|
1237
|
20
|
|
|
20
|
1
|
59
|
my $self = shift; |
|
1238
|
|
|
|
|
|
|
$self->_token_generic( |
|
1239
|
|
|
|
|
|
|
name => "float", |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
pattern => "float", |
|
1242
|
18
|
|
|
18
|
|
122
|
convert => sub { $_[1] + 0 }, |
|
1243
|
20
|
|
|
|
|
76
|
); |
|
1244
|
|
|
|
|
|
|
} |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=head2 token_number |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
$number = $parser->token_number |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
I |
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
Expects to find a number expressed in either of the above forms. |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
=cut |
|
1255
|
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
sub token_number |
|
1257
|
|
|
|
|
|
|
{ |
|
1258
|
7
|
|
|
7
|
1
|
28
|
my $self = shift; |
|
1259
|
7
|
|
|
|
|
21
|
$self->any_of( \&token_float, \&token_int ); |
|
1260
|
|
|
|
|
|
|
} |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=head2 token_string |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
$str = $parser->token_string |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
Expects to find a quoted string, and consumes it. The string should be quoted |
|
1267
|
|
|
|
|
|
|
using C<"> or C<'> quote marks. |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
The content of the quoted string can contain character escapes similar to |
|
1270
|
|
|
|
|
|
|
those accepted by C or Perl. Specifically, the following forms are recognised: |
|
1271
|
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
\a Bell ("alert") |
|
1273
|
|
|
|
|
|
|
\b Backspace |
|
1274
|
|
|
|
|
|
|
\e Escape |
|
1275
|
|
|
|
|
|
|
\f Form feed |
|
1276
|
|
|
|
|
|
|
\n Newline |
|
1277
|
|
|
|
|
|
|
\r Return |
|
1278
|
|
|
|
|
|
|
\t Horizontal Tab |
|
1279
|
|
|
|
|
|
|
\0, \012 Octal character |
|
1280
|
|
|
|
|
|
|
\x34, \x{5678} Hexadecimal character |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
C's C<\v> for vertical tab is not supported as it is rarely used in practice |
|
1283
|
|
|
|
|
|
|
and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other |
|
1284
|
|
|
|
|
|
|
control characters is also not supported. |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
=cut |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
my %escapes = ( |
|
1289
|
|
|
|
|
|
|
a => "\a", |
|
1290
|
|
|
|
|
|
|
b => "\b", |
|
1291
|
|
|
|
|
|
|
e => "\e", |
|
1292
|
|
|
|
|
|
|
f => "\f", |
|
1293
|
|
|
|
|
|
|
n => "\n", |
|
1294
|
|
|
|
|
|
|
r => "\r", |
|
1295
|
|
|
|
|
|
|
t => "\t", |
|
1296
|
|
|
|
|
|
|
); |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
sub token_string |
|
1299
|
|
|
|
|
|
|
{ |
|
1300
|
53
|
|
|
53
|
1
|
137
|
my $self = shift; |
|
1301
|
|
|
|
|
|
|
|
|
1302
|
53
|
100
|
|
|
|
102
|
$self->fail( "Expected string" ) if $self->at_eos; |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
52
|
|
|
|
|
115
|
my $pos = pos $self->{str}; |
|
1305
|
|
|
|
|
|
|
|
|
1306
|
52
|
|
|
|
|
109
|
$self->skip_ws; |
|
1307
|
52
|
100
|
|
|
|
347
|
$self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or |
|
1308
|
|
|
|
|
|
|
$self->fail( "Expected string delimiter" ); |
|
1309
|
|
|
|
|
|
|
|
|
1310
|
32
|
|
|
|
|
89
|
my $delim = $1; |
|
1311
|
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
$self->{str} =~ m/ |
|
1313
|
|
|
|
|
|
|
\G( |
|
1314
|
|
|
|
|
|
|
(?: |
|
1315
|
|
|
|
|
|
|
\\[0-7]{1,3} # octal escape |
|
1316
|
|
|
|
|
|
|
|\\x[0-9A-F]{2} # 2-digit hex escape |
|
1317
|
|
|
|
|
|
|
|\\x\{[0-9A-F]+\} # {}-delimited hex escape |
|
1318
|
|
|
|
|
|
|
|\\. # symbolic escape |
|
1319
|
|
|
|
|
|
|
|[^\\$delim]+ # plain chunk |
|
1320
|
|
|
|
|
|
|
)*? |
|
1321
|
|
|
|
|
|
|
)$delim/gcix or |
|
1322
|
32
|
50
|
|
|
|
780
|
pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); |
|
1323
|
|
|
|
|
|
|
|
|
1324
|
32
|
|
|
|
|
84
|
my $string = $1; |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
32
|
|
|
|
|
80
|
$string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))> |
|
1327
|
|
|
|
|
|
|
[defined $1 ? chr oct $1 : |
|
1328
|
|
|
|
|
|
|
defined $2 ? chr hex $2 : |
|
1329
|
11
|
50
|
|
|
|
67
|
defined $3 ? chr hex $3 : |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
exists $escapes{$4} ? $escapes{$4} : $4]egi; |
|
1331
|
32
|
|
|
|
|
101
|
|
|
1332
|
|
|
|
|
|
|
return $string; |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
=head2 token_ident |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
$ident = $parser->token_ident |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
Expects to find an identifier, and consumes it. |
|
1340
|
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
=cut |
|
1342
|
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
sub token_ident |
|
1344
|
72
|
|
|
72
|
1
|
121
|
{ |
|
1345
|
72
|
|
|
|
|
123
|
my $self = shift; |
|
1346
|
|
|
|
|
|
|
$self->_token_generic( |
|
1347
|
|
|
|
|
|
|
name => "ident", |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
pattern => "ident", |
|
1350
|
|
|
|
|
|
|
); |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
=head2 token_kw |
|
1354
|
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
$keyword = $parser->token_kw( @keywords ) |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
Expects to find a keyword, and consumes it. A keyword is defined as an |
|
1358
|
|
|
|
|
|
|
identifier which is exactly one of the literal values passed in. |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=cut |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
sub token_kw |
|
1363
|
2
|
|
|
2
|
1
|
9
|
{ |
|
1364
|
2
|
|
|
|
|
5
|
my $self = shift; |
|
1365
|
|
|
|
|
|
|
my @acceptable = @_; |
|
1366
|
2
|
|
|
|
|
6
|
|
|
1367
|
|
|
|
|
|
|
$self->skip_ws; |
|
1368
|
2
|
|
|
|
|
3
|
|
|
1369
|
|
|
|
|
|
|
my $pos = pos $self->{str}; |
|
1370
|
2
|
50
|
|
|
|
7
|
|
|
1371
|
|
|
|
|
|
|
defined( my $kw = $self->token_ident ) or |
|
1372
|
|
|
|
|
|
|
return undef; |
|
1373
|
4
|
|
|
|
|
17
|
|
|
1374
|
2
|
100
|
|
|
|
3
|
grep { $_ eq $kw } @acceptable or |
|
1375
|
|
|
|
|
|
|
pos($self->{str}) = $pos, $self->fail( "Expected any of ".join( ", ", @acceptable ) ); |
|
1376
|
1
|
|
|
|
|
3
|
|
|
1377
|
|
|
|
|
|
|
return $kw; |
|
1378
|
|
|
|
|
|
|
} |
|
1379
|
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
package # hide from indexer |
|
1381
|
|
|
|
|
|
|
Parser::MGC::Failure; |
|
1382
|
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
sub new |
|
1384
|
273
|
|
|
273
|
|
319
|
{ |
|
1385
|
273
|
|
|
|
|
442
|
my $class = shift; |
|
1386
|
273
|
|
|
|
|
459
|
my $self = bless {}, $class; |
|
|
273
|
|
|
|
|
1107
|
|
|
1387
|
273
|
|
|
|
|
1114
|
@{$self}{qw( message parser pos )} = @_; |
|
1388
|
|
|
|
|
|
|
return $self; |
|
1389
|
|
|
|
|
|
|
} |
|
1390
|
31
|
|
|
31
|
|
25447
|
|
|
|
31
|
|
|
|
|
19299
|
|
|
|
31
|
|
|
|
|
200
|
|
|
1391
|
|
|
|
|
|
|
use overload '""' => "STRING"; |
|
1392
|
|
|
|
|
|
|
sub STRING |
|
1393
|
43
|
|
|
43
|
|
3444
|
{ |
|
1394
|
|
|
|
|
|
|
my $self = shift; |
|
1395
|
43
|
|
|
|
|
88
|
|
|
1396
|
43
|
|
|
|
|
164
|
my $parser = $self->{parser}; |
|
1397
|
|
|
|
|
|
|
my ( $linenum, $col, $text ) = $parser->where( $self->{pos} ); |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
# Column number only counts characters. There may be tabs in there. |
|
1400
|
|
|
|
|
|
|
# Rather than trying to calculate the visual column number, just print the |
|
1401
|
|
|
|
|
|
|
# indentation as it stands. |
|
1402
|
43
|
|
|
|
|
85
|
|
|
1403
|
43
|
|
|
|
|
122
|
my $indent = substr( $text, 0, $col ); |
|
1404
|
|
|
|
|
|
|
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace |
|
1405
|
43
|
|
|
|
|
144
|
|
|
1406
|
43
|
100
|
100
|
|
|
152
|
my $filename = $parser->filename; |
|
1407
|
|
|
|
|
|
|
my $in_file = ( defined $filename and !ref $filename ) |
|
1408
|
|
|
|
|
|
|
? "in $filename " : ""; |
|
1409
|
43
|
|
|
|
|
422
|
|
|
1410
|
|
|
|
|
|
|
return "$self->{message} ${in_file}on line $linenum at:\n" . |
|
1411
|
|
|
|
|
|
|
"$text\n" . |
|
1412
|
|
|
|
|
|
|
"$indent^\n"; |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
31
|
|
|
31
|
|
6153
|
# Provide fallback operators for cmp, eq, etc... |
|
|
31
|
|
|
|
|
58
|
|
|
|
31
|
|
|
|
|
137
|
|
|
1416
|
|
|
|
|
|
|
use overload fallback => 1; |
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=head1 EXAMPLES |
|
1419
|
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=head2 Accumulating Results Using Variables |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
Although the structure-forming methods all return a value, obtained from their |
|
1423
|
|
|
|
|
|
|
nested parsing code, it can sometimes be more convenient to use a variable to |
|
1424
|
|
|
|
|
|
|
accumulate a result in instead. For example, consider the following parser |
|
1425
|
|
|
|
|
|
|
method, designed to parse a set of C assignments, such as might |
|
1426
|
|
|
|
|
|
|
be found in a configuration file, or YAML/JSON-style mapping value. |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub parse_dict |
|
1429
|
|
|
|
|
|
|
{ |
|
1430
|
|
|
|
|
|
|
my $self = shift; |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
my %ret; |
|
1433
|
|
|
|
|
|
|
$self->list_of( ",", sub { |
|
1434
|
|
|
|
|
|
|
my $key = $self->token_ident; |
|
1435
|
|
|
|
|
|
|
exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); |
|
1436
|
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
$self->expect( ":" ); |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
$ret{$key} = $self->parse_value; |
|
1440
|
|
|
|
|
|
|
} ); |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
return \%ret |
|
1443
|
|
|
|
|
|
|
} |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
Instead of using the return value from C, this method accumulates |
|
1446
|
|
|
|
|
|
|
values in the C<%ret> hash, eventually returning a reference to it as its |
|
1447
|
|
|
|
|
|
|
result. Because of this, it can perform some error checking while it parses; |
|
1448
|
|
|
|
|
|
|
namely, rejecting duplicate keys. |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=head1 TODO |
|
1451
|
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=over 4 |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=item * |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Make unescaping of string constants more customisable. Possibly consider |
|
1457
|
|
|
|
|
|
|
instead a C using a loop over C. |
|
1458
|
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
=item * |
|
1460
|
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
Easy ability for subclasses to define more token types as methods. Perhaps |
|
1462
|
|
|
|
|
|
|
provide a class method such as |
|
1463
|
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
__PACKAGE__->has_token( hex => qr/[0-9A-F]+/i, sub { hex $_[1] } ); |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
=item * |
|
1467
|
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
Investigate how well C can cope with buffer splitting across |
|
1469
|
|
|
|
|
|
|
other tokens than simply skippable whitespace |
|
1470
|
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=back |
|
1472
|
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
Paul Evans |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
=cut |
|
1478
|
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
0x55AA; |