line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2010-2021 -- leonerd@leonerd.org.uk |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package Parser::MGC 0.19; |
7
|
|
|
|
|
|
|
|
8
|
31
|
|
|
31
|
|
1726036
|
use v5.14; |
|
31
|
|
|
|
|
337
|
|
9
|
31
|
|
|
31
|
|
179
|
use warnings; |
|
31
|
|
|
|
|
57
|
|
|
31
|
|
|
|
|
998
|
|
10
|
|
|
|
|
|
|
|
11
|
31
|
|
|
31
|
|
179
|
use Carp; |
|
31
|
|
|
|
|
66
|
|
|
31
|
|
|
|
|
2185
|
|
12
|
31
|
|
|
31
|
|
16410
|
use Feature::Compat::Try; |
|
31
|
|
|
|
|
11601
|
|
|
31
|
|
|
|
|
133
|
|
13
|
|
|
|
|
|
|
|
14
|
31
|
|
|
31
|
|
67687
|
use File::Slurp::Tiny qw( read_file ); |
|
31
|
|
|
|
|
426616
|
|
|
31
|
|
|
|
|
2145
|
|
15
|
31
|
|
|
31
|
|
301
|
use Scalar::Util qw( blessed ); |
|
31
|
|
|
|
|
69
|
|
|
31
|
|
|
|
|
3210
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
C - build simple recursive-descent parsers |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
package My::Grammar::Parser; |
24
|
|
|
|
|
|
|
use base qw( Parser::MGC ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub parse |
27
|
|
|
|
|
|
|
{ |
28
|
|
|
|
|
|
|
my $self = shift; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
$self->sequence_of( sub { |
31
|
|
|
|
|
|
|
$self->any_of( |
32
|
|
|
|
|
|
|
sub { $self->token_int }, |
33
|
|
|
|
|
|
|
sub { $self->token_string }, |
34
|
|
|
|
|
|
|
sub { \$self->token_ident }, |
35
|
|
|
|
|
|
|
sub { $self->scope_of( "(", \&parse, ")" ) } |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
} ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $parser = My::Grammar::Parser->new; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $tree = $parser->from_file( $ARGV[0] ); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
... |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 DESCRIPTION |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This base class provides a low-level framework for building recursive-descent |
49
|
|
|
|
|
|
|
parsers that consume a given input string from left to right, returning a |
50
|
|
|
|
|
|
|
parse structure. It takes its name from the C regexps used to implement |
51
|
|
|
|
|
|
|
the token parsing behaviour. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
It provides a number of token-parsing methods, which each extract a |
54
|
|
|
|
|
|
|
grammatical token from the string. It also provides wrapping methods that can |
55
|
|
|
|
|
|
|
be used to build up a possibly-recursive grammar structure, by applying a |
56
|
|
|
|
|
|
|
structure around other parts of parsing code. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head2 Backtracking |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
Each method, both token and structural, atomically either consumes a prefix of |
61
|
|
|
|
|
|
|
the string and returns its result, or fails and consumes nothing. This makes |
62
|
|
|
|
|
|
|
it simple to implement grammars that require backtracking. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Several structure-forming methods have some form of "optional" behaviour; they |
65
|
|
|
|
|
|
|
can optionally consume some amount of input or take some particular choice, |
66
|
|
|
|
|
|
|
but if the code invoked inside that subsequently fails, the structure can |
67
|
|
|
|
|
|
|
backtrack and take some different behaviour. This is usually what is required |
68
|
|
|
|
|
|
|
when testing whether the structure of the input string matches some part of |
69
|
|
|
|
|
|
|
the grammar that is optional, or has multiple choices. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
However, once the choice of grammar has been made, it is often useful to be |
72
|
|
|
|
|
|
|
able to fix on that one choice, thus making subsequent failures propagate up |
73
|
|
|
|
|
|
|
rather than taking that alternative behaviour. Control of this backtracking |
74
|
|
|
|
|
|
|
is given by the C method; and careful use of this method is one of the |
75
|
|
|
|
|
|
|
key advantages that C has over more simple parsing using single |
76
|
|
|
|
|
|
|
regexps alone. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=cut |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 new |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
$parser = Parser::MGC->new( %args ) |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Returns a new instance of a C object. This must be called on a |
89
|
|
|
|
|
|
|
subclass that provides method of the name provided as C, by default |
90
|
|
|
|
|
|
|
called C. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Takes the following named arguments |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=over 8 |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item toplevel => STRING |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Name of the toplevel method to use to start the parse from. If not supplied, |
99
|
|
|
|
|
|
|
will try to use a method called C. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item patterns => HASH |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Keys in this hash should map to quoted regexp (C) references, to |
104
|
|
|
|
|
|
|
override the default patterns used to match tokens. See C below |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item accept_0o_oct => BOOL |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
If true, the C method will also accept integers with a C<0o> prefix |
109
|
|
|
|
|
|
|
as octal. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=back |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=cut |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 PATTERNS |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
The following pattern names are recognised. They may be passed to the |
118
|
|
|
|
|
|
|
constructor in the C hash, or provided as a class method under the |
119
|
|
|
|
|
|
|
name C>. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=over 4 |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item * ws |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
Pattern used to skip whitespace between tokens. Defaults to C[\s\n\t]+/> |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=item * comment |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Pattern used to skip comments between tokens. Undefined by default. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item * int |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Pattern used to parse an integer by C. Defaults to |
134
|
|
|
|
|
|
|
C-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/>. If C is given, then |
135
|
|
|
|
|
|
|
this will be expanded to match C0o[0-7]+/> as well. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=item * float |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
Pattern used to parse a floating-point number by C. Defaults to |
140
|
|
|
|
|
|
|
C-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i>. |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item * ident |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Pattern used to parse an identifier by C. Defaults to |
145
|
|
|
|
|
|
|
C[[:alpha:]_]\w*/> |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item * string_delim |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Pattern used to delimit a string by C. Defaults to C["']/>. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=back |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
my @patterns = qw( |
156
|
|
|
|
|
|
|
ws |
157
|
|
|
|
|
|
|
comment |
158
|
|
|
|
|
|
|
int |
159
|
|
|
|
|
|
|
float |
160
|
|
|
|
|
|
|
ident |
161
|
|
|
|
|
|
|
string_delim |
162
|
|
|
|
|
|
|
); |
163
|
|
|
|
|
|
|
|
164
|
31
|
|
|
31
|
|
238
|
use constant pattern_ws => qr/[\s\n\t]+/; |
|
31
|
|
|
|
|
73
|
|
|
31
|
|
|
|
|
2494
|
|
165
|
31
|
|
|
31
|
|
203
|
use constant pattern_comment => undef; |
|
31
|
|
|
|
|
66
|
|
|
31
|
|
|
|
|
2529
|
|
166
|
31
|
|
|
31
|
|
223
|
use constant pattern_int => qr/-?(?:0x[[:xdigit:]]+|[[:digit:]]+)/; |
|
31
|
|
|
|
|
52
|
|
|
31
|
|
|
|
|
3351
|
|
167
|
31
|
|
|
31
|
|
229
|
use constant pattern_float => qr/-?(?:\d*\.\d+|\d+\.)(?:e-?\d+)?|-?\d+e-?\d+/i; |
|
31
|
|
|
|
|
224
|
|
|
31
|
|
|
|
|
4125
|
|
168
|
31
|
|
|
31
|
|
232
|
use constant pattern_ident => qr/[[:alpha:]_]\w*/; |
|
31
|
|
|
|
|
67
|
|
|
31
|
|
|
|
|
2003
|
|
169
|
31
|
|
|
31
|
|
189
|
use constant pattern_string_delim => qr/["']/; |
|
31
|
|
|
|
|
57
|
|
|
31
|
|
|
|
|
123267
|
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub new |
172
|
|
|
|
|
|
|
{ |
173
|
46
|
|
|
46
|
1
|
2417
|
my $class = shift; |
174
|
46
|
|
|
|
|
146
|
my %args = @_; |
175
|
|
|
|
|
|
|
|
176
|
46
|
|
100
|
|
|
312
|
my $toplevel = $args{toplevel} || "parse"; |
177
|
|
|
|
|
|
|
|
178
|
46
|
50
|
|
|
|
387
|
$class->can( $toplevel ) or |
179
|
|
|
|
|
|
|
croak "Expected to be a subclass that can ->$toplevel"; |
180
|
|
|
|
|
|
|
|
181
|
46
|
|
|
|
|
255
|
my $self = bless { |
182
|
|
|
|
|
|
|
toplevel => $toplevel, |
183
|
|
|
|
|
|
|
patterns => {}, |
184
|
|
|
|
|
|
|
scope_level => 0, |
185
|
|
|
|
|
|
|
}, $class; |
186
|
|
|
|
|
|
|
|
187
|
46
|
|
100
|
|
|
303
|
$self->{patterns}{$_} = $args{patterns}{$_} || $self->${\"pattern_$_"} for @patterns; |
188
|
|
|
|
|
|
|
|
189
|
46
|
100
|
|
|
|
173
|
if( $args{accept_0o_oct} ) { |
190
|
1
|
|
|
|
|
39
|
$self->{patterns}{int} = qr/0o[0-7]+|$self->{patterns}{int}/; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
46
|
100
|
|
|
|
178
|
if( defined $self->{patterns}{comment} ) { |
194
|
1
|
|
|
|
|
42
|
$self->{patterns}{_skip} = qr/$self->{patterns}{ws}|$self->{patterns}{comment}/; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
else { |
197
|
45
|
|
|
|
|
138
|
$self->{patterns}{_skip} = $self->{patterns}{ws}; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
46
|
|
|
|
|
213
|
return $self; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head1 METHODS |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=cut |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 from_string |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
$result = $parser->from_string( $str ) |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Parse the given literal string and return the result from the toplevel method. |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=cut |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub from_string |
216
|
|
|
|
|
|
|
{ |
217
|
160
|
|
|
160
|
1
|
31108
|
my $self = shift; |
218
|
160
|
|
|
|
|
359
|
my ( $str ) = @_; |
219
|
|
|
|
|
|
|
|
220
|
160
|
|
|
|
|
352
|
$self->{str} = $str; |
221
|
|
|
|
|
|
|
|
222
|
160
|
|
|
|
|
439
|
pos $self->{str} = 0; |
223
|
|
|
|
|
|
|
|
224
|
160
|
|
|
|
|
345
|
my $toplevel = $self->{toplevel}; |
225
|
160
|
|
|
|
|
571
|
my $result = $self->$toplevel; |
226
|
|
|
|
|
|
|
|
227
|
143
|
100
|
|
|
|
586
|
$self->at_eos or |
228
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
229
|
|
|
|
|
|
|
|
230
|
138
|
|
|
|
|
766
|
return $result; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head2 from_file |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
$result = $parser->from_file( $file, %opts ) |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Parse the given file, which may be a pathname in a string, or an opened IO |
238
|
|
|
|
|
|
|
handle, and return the result from the toplevel method. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
The following options are recognised: |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=over 8 |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=item binmode => STRING |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
If set, applies the given binmode to the filehandle before reading. Typically |
247
|
|
|
|
|
|
|
this can be used to set the encoding of the file. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
$parser->from_file( $file, binmode => ":encoding(UTF-8)" ) |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=back |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub from_file |
256
|
|
|
|
|
|
|
{ |
257
|
3
|
|
|
3
|
1
|
1372
|
my $self = shift; |
258
|
3
|
|
|
|
|
11
|
my ( $file, %opts ) = @_; |
259
|
|
|
|
|
|
|
|
260
|
3
|
50
|
|
|
|
11
|
defined $file or croak "Expected a filename to ->from_file"; |
261
|
|
|
|
|
|
|
|
262
|
3
|
|
|
|
|
7
|
$self->{filename} = $file; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
$self->from_string( ref $file ? |
265
|
2
|
50
|
|
|
|
8
|
do { local $/; binmode $file, $opts{binmode} if $opts{binmode}; <$file> } : |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
78
|
|
266
|
3
|
100
|
|
|
|
21
|
( read_file $file, binmode => $opts{binmode} ) ); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 from_reader |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
$result = $parser->from_reader( \&reader ) |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
I |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Parse the input which is read by the C function. This function will be |
276
|
|
|
|
|
|
|
called in scalar context to generate portions of string to parse, being passed |
277
|
|
|
|
|
|
|
the C<$parser> object. The function should return C when it has no more |
278
|
|
|
|
|
|
|
string to return. |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
$reader->( $parser ) |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
Note that because it is not generally possible to detect exactly when more |
283
|
|
|
|
|
|
|
input may be required due to failed regexp parsing, the reader function is |
284
|
|
|
|
|
|
|
only invoked during searching for skippable whitespace. This makes it suitable |
285
|
|
|
|
|
|
|
for reading lines of a file in the common case where lines are considered as |
286
|
|
|
|
|
|
|
skippable whitespace, or for reading lines of input interactively from a |
287
|
|
|
|
|
|
|
user. It cannot be used in all cases (for example, reading fixed-size buffers |
288
|
|
|
|
|
|
|
from a file) because two successive invocations may split a single token |
289
|
|
|
|
|
|
|
across the buffer boundaries, and cause parse failures. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=cut |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub from_reader |
294
|
|
|
|
|
|
|
{ |
295
|
1
|
|
|
1
|
1
|
12
|
my $self = shift; |
296
|
1
|
|
|
|
|
3
|
my ( $reader ) = @_; |
297
|
|
|
|
|
|
|
|
298
|
1
|
|
|
|
|
3
|
local $self->{reader} = $reader; |
299
|
|
|
|
|
|
|
|
300
|
1
|
|
|
|
|
3
|
$self->{str} = ""; |
301
|
1
|
|
|
|
|
4
|
pos $self->{str} = 0; |
302
|
|
|
|
|
|
|
|
303
|
1
|
|
|
|
|
5
|
my $result = $self->parse; |
304
|
|
|
|
|
|
|
|
305
|
1
|
50
|
|
|
|
3
|
$self->at_eos or |
306
|
|
|
|
|
|
|
$self->fail( "Expected end of input" ); |
307
|
|
|
|
|
|
|
|
308
|
1
|
|
|
|
|
8
|
return $result; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 pos |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
$pos = $parser->pos |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
I |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Returns the current parse position, as a character offset from the beginning |
318
|
|
|
|
|
|
|
of the file or string. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub pos |
323
|
|
|
|
|
|
|
{ |
324
|
277
|
|
|
277
|
1
|
379
|
my $self = shift; |
325
|
277
|
|
|
|
|
761
|
return pos $self->{str}; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 take |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
$str = $parser->take( $len ) |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
I |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Returns the next C<$len> characters directly from the input, prior to any |
335
|
|
|
|
|
|
|
whitespace or comment skipping. This does I take account of any |
336
|
|
|
|
|
|
|
end-of-scope marker that may be pending. It is intended for use by parsers of |
337
|
|
|
|
|
|
|
partially-binary protocols, or other situations in which it would be incorrect |
338
|
|
|
|
|
|
|
for the end-of-scope marker to take effect at this time. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=cut |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub take |
343
|
|
|
|
|
|
|
{ |
344
|
46
|
|
|
46
|
1
|
84
|
my $self = shift; |
345
|
46
|
|
|
|
|
76
|
my ( $len ) = @_; |
346
|
|
|
|
|
|
|
|
347
|
46
|
|
|
|
|
164
|
my $start = pos( $self->{str} ); |
348
|
|
|
|
|
|
|
|
349
|
46
|
|
|
|
|
132
|
pos( $self->{str} ) += $len; |
350
|
|
|
|
|
|
|
|
351
|
46
|
|
|
|
|
274
|
return substr( $self->{str}, $start, $len ); |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 where |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
( $lineno, $col, $text ) = $parser->where |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Returns the current parse position, as a line and column number, and |
359
|
|
|
|
|
|
|
the entire current line of text. The first line is numbered 1, and the first |
360
|
|
|
|
|
|
|
column is numbered 0. |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=cut |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
sub where |
365
|
|
|
|
|
|
|
{ |
366
|
14
|
|
|
14
|
1
|
31
|
my $self = shift; |
367
|
14
|
|
|
|
|
31
|
my ( $pos ) = @_; |
368
|
|
|
|
|
|
|
|
369
|
14
|
100
|
|
|
|
51
|
defined $pos or $pos = pos $self->{str}; |
370
|
|
|
|
|
|
|
|
371
|
14
|
|
|
|
|
32
|
my $str = $self->{str}; |
372
|
|
|
|
|
|
|
|
373
|
14
|
|
|
|
|
24
|
my $sol = $pos; |
374
|
14
|
100
|
100
|
|
|
83
|
$sol-- if $sol > 0 and substr( $str, $sol, 1 ) =~ m/^[\r\n]$/; |
375
|
14
|
|
100
|
|
|
140
|
$sol-- while $sol > 0 and substr( $str, $sol-1, 1 ) !~ m/^[\r\n]$/; |
376
|
|
|
|
|
|
|
|
377
|
14
|
|
|
|
|
24
|
my $eol = $pos; |
378
|
14
|
|
100
|
|
|
170
|
$eol++ while $eol < length($str) and substr( $str, $eol, 1 ) !~ m/^[\r\n]$/; |
379
|
|
|
|
|
|
|
|
380
|
14
|
|
|
|
|
37
|
my $line = substr( $str, $sol, $eol - $sol ); |
381
|
|
|
|
|
|
|
|
382
|
14
|
|
|
|
|
24
|
my $col = $pos - $sol; |
383
|
14
|
|
|
|
|
49
|
my $lineno = ( () = substr( $str, 0, $pos ) =~ m/\n/g ) + 1; |
384
|
|
|
|
|
|
|
|
385
|
14
|
|
|
|
|
68
|
return ( $lineno, $col, $line ); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 fail |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head2 fail_from |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
$parser->fail( $message ) |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
$parser->fail_from( $pos, $message ) |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
I since version 0.09.> |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
Aborts the current parse attempt with the given message string. The failure |
399
|
|
|
|
|
|
|
message will include the line and column position, and the line of input that |
400
|
|
|
|
|
|
|
failed at the current parse position (C), or a position earlier obtained |
401
|
|
|
|
|
|
|
using the C method (C). |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
This failure will propagate up to the inner-most structure parsing method that |
404
|
|
|
|
|
|
|
has not been committed; or will cause the entire parser to fail if there are |
405
|
|
|
|
|
|
|
no further options to take. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=cut |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub fail |
410
|
|
|
|
|
|
|
{ |
411
|
271
|
|
|
271
|
1
|
423
|
my $self = shift; |
412
|
271
|
|
|
|
|
471
|
my ( $message ) = @_; |
413
|
271
|
|
|
|
|
649
|
$self->fail_from( $self->pos, $message ); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub fail_from |
417
|
|
|
|
|
|
|
{ |
418
|
271
|
|
|
271
|
1
|
356
|
my $self = shift; |
419
|
271
|
|
|
|
|
496
|
my ( $pos, $message ) = @_; |
420
|
271
|
|
|
|
|
659
|
die Parser::MGC::Failure->new( $message, $self, $pos ); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
253
|
100
|
|
253
|
|
2782
|
sub _isa_failure { blessed $_[0] and $_[0]->isa( "Parser::MGC::Failure" ) } |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 at_eos |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
$eos = $parser->at_eos |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Returns true if the input string is at the end of the string. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=cut |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub at_eos |
434
|
|
|
|
|
|
|
{ |
435
|
596
|
|
|
596
|
1
|
840
|
my $self = shift; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# Save pos() before skipping ws so we don't break the substring_before method |
438
|
596
|
|
|
|
|
954
|
my $pos = pos $self->{str}; |
439
|
|
|
|
|
|
|
|
440
|
596
|
|
|
|
|
1414
|
$self->skip_ws; |
441
|
|
|
|
|
|
|
|
442
|
596
|
|
|
|
|
830
|
my $at_eos; |
443
|
596
|
100
|
|
|
|
1461
|
if( pos( $self->{str} ) >= length $self->{str} ) { |
|
|
100
|
|
|
|
|
|
444
|
162
|
|
|
|
|
246
|
$at_eos = 1; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
elsif( defined $self->{endofscope} ) { |
447
|
114
|
|
|
|
|
530
|
$at_eos = $self->{str} =~ m/\G$self->{endofscope}/; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
else { |
450
|
320
|
|
|
|
|
501
|
$at_eos = 0; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
596
|
|
|
|
|
1271
|
pos( $self->{str} ) = $pos; |
454
|
|
|
|
|
|
|
|
455
|
596
|
|
|
|
|
1649
|
return $at_eos; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=head2 scope_level |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
$level = $parser->scope_level |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
I |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Returns the number of nested C calls that have been made. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=cut |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub scope_level |
469
|
|
|
|
|
|
|
{ |
470
|
5
|
|
|
5
|
1
|
10
|
my $self = shift; |
471
|
5
|
|
|
|
|
45
|
return $self->{scope_level}; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head1 STRUCTURE-FORMING METHODS |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
The following methods may be used to build a grammatical structure out of the |
477
|
|
|
|
|
|
|
defined basic token-parsing methods. Each takes at least one code reference, |
478
|
|
|
|
|
|
|
which will be passed the actual C<$parser> object as its first argument. |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Anywhere that a code reference is expected also permits a plain string giving |
481
|
|
|
|
|
|
|
the name of a method to invoke. This is sufficient in many simple cases, such |
482
|
|
|
|
|
|
|
as |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
$self->any_of( |
485
|
|
|
|
|
|
|
'token_int', |
486
|
|
|
|
|
|
|
'token_string', |
487
|
|
|
|
|
|
|
... |
488
|
|
|
|
|
|
|
); |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 maybe |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
$ret = $parser->maybe( $code ) |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Attempts to execute the given C<$code> in scalar context, and returns what it |
497
|
|
|
|
|
|
|
returned, accepting that it might fail. C<$code> may either be a CODE |
498
|
|
|
|
|
|
|
reference or a method name given as a string. |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
If the code fails (either by calling C itself, or by propagating a |
501
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C, then |
502
|
|
|
|
|
|
|
none of the input string will be consumed; the current parsing position will |
503
|
|
|
|
|
|
|
be restored. C will be returned in this case. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
If it calls C then any subsequent failure will be propagated to the |
506
|
|
|
|
|
|
|
caller, rather than returning C. |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
This may be considered to be similar to the C> regexp qualifier. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub parse_declaration |
511
|
|
|
|
|
|
|
{ |
512
|
|
|
|
|
|
|
my $self = shift; |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
[ $self->parse_type, |
515
|
|
|
|
|
|
|
$self->token_ident, |
516
|
|
|
|
|
|
|
$self->maybe( sub { |
517
|
|
|
|
|
|
|
$self->expect( "=" ); |
518
|
|
|
|
|
|
|
$self->parse_expression |
519
|
|
|
|
|
|
|
} ), |
520
|
|
|
|
|
|
|
]; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=cut |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
sub maybe |
526
|
|
|
|
|
|
|
{ |
527
|
4
|
|
|
4
|
1
|
39
|
my $self = shift; |
528
|
4
|
|
|
|
|
9
|
my ( $code ) = @_; |
529
|
|
|
|
|
|
|
|
530
|
4
|
|
|
|
|
7
|
my $pos = pos $self->{str}; |
531
|
|
|
|
|
|
|
|
532
|
4
|
|
|
|
|
5
|
my $committed = 0; |
533
|
4
|
|
|
0
|
|
16
|
local $self->{committer} = sub { $committed++ }; |
|
0
|
|
|
|
|
0
|
|
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
try { |
536
|
|
|
|
|
|
|
return $self->$code; |
537
|
|
|
|
|
|
|
} |
538
|
4
|
|
|
|
|
13
|
catch ( $e ) { |
539
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
542
|
|
|
|
|
|
|
return undef; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=head2 scope_of |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
$ret = $parser->scope_of( $start, $code, $stop ) |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Expects to find the C<$start> pattern, then attempts to execute the given |
551
|
|
|
|
|
|
|
C<$code>, then expects to find the C<$stop> pattern. Returns whatever the |
552
|
|
|
|
|
|
|
code returned. C<$code> may either be a CODE reference of a method name given |
553
|
|
|
|
|
|
|
as a string. |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
While the code is being executed, the C<$stop> pattern will be used by the |
556
|
|
|
|
|
|
|
token parsing methods as an end-of-scope marker; causing them to raise a |
557
|
|
|
|
|
|
|
failure if called at the end of a scope. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
sub parse_block |
560
|
|
|
|
|
|
|
{ |
561
|
|
|
|
|
|
|
my $self = shift; |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
$self->scope_of( "{", 'parse_statements', "}" ); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
If the C<$start> pattern is undefined, it is presumed the caller has already |
567
|
|
|
|
|
|
|
checked for this. This is useful when the stop pattern needs to be calculated |
568
|
|
|
|
|
|
|
based on the start pattern. |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
sub parse_bracketed |
571
|
|
|
|
|
|
|
{ |
572
|
|
|
|
|
|
|
my $self = shift; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
my $delim = $self->expect( qr/[\(\[\<\{]/ ); |
575
|
|
|
|
|
|
|
$delim =~ tr/([<{/)]>}/; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
$self->scope_of( undef, 'parse_body', $delim ); |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
This method does not have any optional parts to it; any failures are |
581
|
|
|
|
|
|
|
immediately propagated to the caller. |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=cut |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub scope_of |
586
|
|
|
|
|
|
|
{ |
587
|
19
|
|
|
19
|
1
|
108
|
my $self = shift; |
588
|
19
|
|
|
|
|
77
|
$self->_scope_of( 0, @_ ); |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub _scope_of |
592
|
|
|
|
|
|
|
{ |
593
|
73
|
|
|
73
|
|
102
|
my $self = shift; |
594
|
73
|
|
|
|
|
155
|
my ( $commit_if_started, $start, $code, $stop ) = @_; |
595
|
|
|
|
|
|
|
|
596
|
73
|
50
|
|
|
|
574
|
ref $stop or $stop = qr/\Q$stop/; |
597
|
|
|
|
|
|
|
|
598
|
73
|
100
|
|
|
|
295
|
$self->expect( $start ) if defined $start; |
599
|
|
|
|
|
|
|
|
600
|
34
|
100
|
|
|
|
135
|
$self->commit if $commit_if_started; |
601
|
|
|
|
|
|
|
|
602
|
34
|
|
|
|
|
81
|
local $self->{endofscope} = $stop; |
603
|
34
|
|
|
|
|
82
|
local $self->{scope_level} = $self->{scope_level} + 1; |
604
|
|
|
|
|
|
|
|
605
|
34
|
|
|
|
|
135
|
my $ret = $self->$code; |
606
|
|
|
|
|
|
|
|
607
|
31
|
|
|
|
|
118
|
$self->expect( $stop ); |
608
|
|
|
|
|
|
|
|
609
|
30
|
|
|
|
|
215
|
return $ret; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head2 committed_scope_of |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
$ret = $parser->committed_scope_of( $start, $code, $stop ) |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
I |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
A variant of L that calls L after a successful match of |
619
|
|
|
|
|
|
|
the start pattern. This is usually what you want if using C from |
620
|
|
|
|
|
|
|
within an C choice, if no other alternative following this one could |
621
|
|
|
|
|
|
|
possibly match if the start pattern has. |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub committed_scope_of |
626
|
|
|
|
|
|
|
{ |
627
|
54
|
|
|
54
|
1
|
95
|
my $self = shift; |
628
|
54
|
|
|
|
|
118
|
$self->_scope_of( 1, @_ ); |
629
|
|
|
|
|
|
|
} |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head2 list_of |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
$ret = $parser->list_of( $sep, $code ) |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
Expects to find a list of instances of something parsed by C<$code>, |
636
|
|
|
|
|
|
|
separated by the C<$sep> pattern. Returns an ARRAY ref containing a list of |
637
|
|
|
|
|
|
|
the return values from the C<$code>. A single trailing delimiter is allowed, |
638
|
|
|
|
|
|
|
and does not affect the return value. C<$code> may either be a CODE reference |
639
|
|
|
|
|
|
|
or a method name given as a string. It is called in list context, and whatever |
640
|
|
|
|
|
|
|
values it returns are appended to the eventual result - similar to perl's |
641
|
|
|
|
|
|
|
C |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
This method does not consider it an error if the returned list is empty; that |
644
|
|
|
|
|
|
|
is, that the scope ended before any item instances were parsed from it. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
sub parse_numbers |
647
|
|
|
|
|
|
|
{ |
648
|
|
|
|
|
|
|
my $self = shift; |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
$self->list_of( ",", 'token_int' ); |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
If the code fails (either by invoking C itself, or by propagating a |
654
|
|
|
|
|
|
|
failure from another method it invoked) before it has invoked C on a |
655
|
|
|
|
|
|
|
particular item, then the item is aborted and the parsing position will be |
656
|
|
|
|
|
|
|
restored to the beginning of that failed item. The list of results from |
657
|
|
|
|
|
|
|
previous successful attempts will be returned. |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
If it calls C within an item then any subsequent failure for that item |
660
|
|
|
|
|
|
|
will cause the entire C to fail, propagating that to the caller. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=cut |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
sub list_of |
665
|
|
|
|
|
|
|
{ |
666
|
72
|
|
|
72
|
1
|
155
|
my $self = shift; |
667
|
72
|
|
|
|
|
129
|
my ( $sep, $code ) = @_; |
668
|
|
|
|
|
|
|
|
669
|
72
|
100
|
33
|
|
|
308
|
ref $sep or $sep = qr/\Q$sep/ if defined $sep; |
670
|
|
|
|
|
|
|
|
671
|
72
|
|
|
|
|
101
|
my $committed; |
672
|
72
|
|
|
14
|
|
277
|
local $self->{committer} = sub { $committed++ }; |
|
14
|
|
|
|
|
31
|
|
673
|
|
|
|
|
|
|
|
674
|
72
|
|
|
|
|
126
|
my @ret; |
675
|
|
|
|
|
|
|
|
676
|
72
|
|
|
|
|
193
|
while( !$self->at_eos ) { |
677
|
127
|
|
|
|
|
182
|
$committed = 0; |
678
|
127
|
|
|
|
|
215
|
my $pos = pos $self->{str}; |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
try { |
681
|
|
|
|
|
|
|
push @ret, $self->$code; |
682
|
|
|
|
|
|
|
next; |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
catch ( $e ) { |
685
|
|
|
|
|
|
|
pos($self->{str}) = $pos; |
686
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
last; |
689
|
|
|
|
|
|
|
} |
690
|
127
|
|
|
|
|
267
|
} |
691
|
|
|
|
|
|
|
continue { |
692
|
103
|
100
|
|
|
|
287
|
if( defined $sep ) { |
693
|
32
|
|
|
|
|
79
|
$self->skip_ws; |
694
|
32
|
100
|
|
|
|
222
|
$self->{str} =~ m/\G$sep/gc or last; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
71
|
|
|
|
|
331
|
return \@ret; |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
=head2 sequence_of |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
$ret = $parser->sequence_of( $code ) |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
A shortcut for calling C with an empty string as separator; expects |
706
|
|
|
|
|
|
|
to find at least one instance of something parsed by C<$code>, separated only |
707
|
|
|
|
|
|
|
by skipped whitespace. |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
This may be considered to be similar to the C<+> or C<*> regexp qualifiers. |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
sub parse_statements |
712
|
|
|
|
|
|
|
{ |
713
|
|
|
|
|
|
|
my $self = shift; |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
$self->sequence_of( 'parse_statement' ); |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
The interaction of failures in the code and the C method is identical |
719
|
|
|
|
|
|
|
to that of C. |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=cut |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
sub sequence_of |
724
|
|
|
|
|
|
|
{ |
725
|
56
|
|
|
56
|
1
|
198
|
my $self = shift; |
726
|
56
|
|
|
|
|
129
|
my ( $code ) = @_; |
727
|
|
|
|
|
|
|
|
728
|
56
|
|
|
|
|
188
|
$self->list_of( undef, $code ); |
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=head2 any_of |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
$ret = $parser->any_of( @codes ) |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
I |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Expects that one of the given code instances can parse something from the |
738
|
|
|
|
|
|
|
input, returning what it returned. Each code instance may indicate a failure |
739
|
|
|
|
|
|
|
to parse by calling the C method or otherwise propagating a failure. |
740
|
|
|
|
|
|
|
Each code instance may either be a CODE reference or a method name given as a |
741
|
|
|
|
|
|
|
string. |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
This may be considered to be similar to the C<|> regexp operator for forming |
744
|
|
|
|
|
|
|
alternations of possible parse trees. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
sub parse_statement |
747
|
|
|
|
|
|
|
{ |
748
|
|
|
|
|
|
|
my $self = shift; |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
$self->any_of( |
751
|
|
|
|
|
|
|
sub { $self->parse_declaration; $self->expect(";") }, |
752
|
|
|
|
|
|
|
sub { $self->parse_expression; $self->expect(";") }, |
753
|
|
|
|
|
|
|
sub { $self->parse_block }, |
754
|
|
|
|
|
|
|
); |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
If the code for a given choice fails (either by invoking C itself, or by |
758
|
|
|
|
|
|
|
propagating a failure from another method it invoked) before it has invoked |
759
|
|
|
|
|
|
|
C itself, then the parsing position restored and the next choice will |
760
|
|
|
|
|
|
|
be attempted. |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
If it calls C then any subsequent failure for that choice will cause |
763
|
|
|
|
|
|
|
the entire C to fail, propagating that to the caller and no further |
764
|
|
|
|
|
|
|
choices will be attempted. |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
If none of the choices match then a simple failure message is printed: |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
Found nothing parseable |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
As this is unlikely to be helpful to users, a better message can be provided |
771
|
|
|
|
|
|
|
by the final choice instead. Don't forget to C before printing the |
772
|
|
|
|
|
|
|
failure message, or it won't count. |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
$self->any_of( |
775
|
|
|
|
|
|
|
'token_int', |
776
|
|
|
|
|
|
|
'token_string', |
777
|
|
|
|
|
|
|
..., |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
sub { $self->commit; $self->fail( "Expected an int or string" ) } |
780
|
|
|
|
|
|
|
); |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=cut |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub any_of |
785
|
|
|
|
|
|
|
{ |
786
|
218
|
|
|
218
|
1
|
491
|
my $self = shift; |
787
|
|
|
|
|
|
|
|
788
|
218
|
|
|
|
|
489
|
while( @_ ) { |
789
|
433
|
|
|
|
|
646
|
my $code = shift; |
790
|
433
|
|
|
|
|
685
|
my $pos = pos $self->{str}; |
791
|
|
|
|
|
|
|
|
792
|
433
|
|
|
|
|
546
|
my $committed = 0; |
793
|
433
|
|
|
55
|
|
1355
|
local $self->{committer} = sub { $committed++ }; |
|
55
|
|
|
|
|
110
|
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
try { |
796
|
|
|
|
|
|
|
return $self->$code; |
797
|
|
|
|
|
|
|
} |
798
|
433
|
|
|
|
|
862
|
catch ( $e ) { |
799
|
|
|
|
|
|
|
pos( $self->{str} ) = $pos; |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
die $e if $committed or not _isa_failure( $e ); |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
|
805
|
12
|
|
|
|
|
30
|
$self->fail( "Found nothing parseable" ); |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
sub one_of { |
809
|
0
|
|
|
0
|
0
|
0
|
croak "Parser::MGC->one_of is deprecated; use ->any_of instead"; |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=head2 commit |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
$parser->commit |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
Calling this method will cancel the backtracking behaviour of the innermost |
817
|
|
|
|
|
|
|
C, C, C, or C structure forming method. |
818
|
|
|
|
|
|
|
That is, if later code then calls C, the exception will be propagated |
819
|
|
|
|
|
|
|
out of C, no further list items will be attempted by C or |
820
|
|
|
|
|
|
|
C, and no further code blocks will be attempted by C. |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Typically this will be called once the grammatical structure alter has been |
823
|
|
|
|
|
|
|
determined, ensuring that any further failures are raised as real exceptions, |
824
|
|
|
|
|
|
|
rather than by attempting other alternatives. |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
sub parse_statement |
827
|
|
|
|
|
|
|
{ |
828
|
|
|
|
|
|
|
my $self = shift; |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
$self->any_of( |
831
|
|
|
|
|
|
|
... |
832
|
|
|
|
|
|
|
sub { |
833
|
|
|
|
|
|
|
$self->scope_of( "{", |
834
|
|
|
|
|
|
|
sub { $self->commit; $self->parse_statements; }, |
835
|
|
|
|
|
|
|
"}" ), |
836
|
|
|
|
|
|
|
}, |
837
|
|
|
|
|
|
|
); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
Though in this common pattern, L may be used instead. |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=cut |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
sub commit |
845
|
|
|
|
|
|
|
{ |
846
|
69
|
|
|
69
|
1
|
143
|
my $self = shift; |
847
|
69
|
50
|
|
|
|
159
|
if( $self->{committer} ) { |
848
|
69
|
|
|
|
|
125
|
$self->{committer}->(); |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
else { |
851
|
0
|
|
|
|
|
0
|
croak "Cannot commit except within a backtrack-able structure"; |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=head1 TOKEN PARSING METHODS |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
The following methods attempt to consume some part of the input string, to be |
858
|
|
|
|
|
|
|
used as part of the parsing process. |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
=cut |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
sub skip_ws |
863
|
|
|
|
|
|
|
{ |
864
|
1283
|
|
|
1283
|
0
|
1737
|
my $self = shift; |
865
|
|
|
|
|
|
|
|
866
|
1283
|
|
|
|
|
1831
|
my $pattern = $self->{patterns}{_skip}; |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
{ |
869
|
1283
|
|
|
|
|
1655
|
1 while $self->{str} =~ m/\G$pattern/gc; |
|
1286
|
|
|
|
|
7202
|
|
870
|
|
|
|
|
|
|
|
871
|
1286
|
100
|
|
|
|
3453
|
return if pos( $self->{str} ) < length $self->{str}; |
872
|
|
|
|
|
|
|
|
873
|
222
|
100
|
|
|
|
614
|
return unless $self->{reader}; |
874
|
|
|
|
|
|
|
|
875
|
4
|
|
|
|
|
10
|
my $more = $self->{reader}->( $self ); |
876
|
4
|
100
|
|
|
|
17
|
if( defined $more ) { |
877
|
3
|
|
|
|
|
5
|
my $pos = pos( $self->{str} ); |
878
|
3
|
|
|
|
|
7
|
$self->{str} .= $more; |
879
|
3
|
|
|
|
|
7
|
pos( $self->{str} ) = $pos; |
880
|
|
|
|
|
|
|
|
881
|
3
|
|
|
|
|
7
|
redo; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
|
884
|
1
|
|
|
|
|
3
|
undef $self->{reader}; |
885
|
1
|
|
|
|
|
2
|
return; |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
=head2 expect |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
$str = $parser->expect( $literal ) |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
$str = $parser->expect( qr/pattern/ ) |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
@groups = $parser->expect( qr/pattern/ ) |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
Expects to find a literal string or regexp pattern match, and consumes it. |
898
|
|
|
|
|
|
|
In scalar context, this method returns the string that was captured. In list |
899
|
|
|
|
|
|
|
context it returns the matching substring and the contents of any subgroups |
900
|
|
|
|
|
|
|
contained in the pattern. |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
This method will raise a parse error (by calling C) if the regexp fails |
903
|
|
|
|
|
|
|
to match. Note that if the pattern could match an empty string (such as for |
904
|
|
|
|
|
|
|
example C), the pattern will always match, even if it has to match an |
905
|
|
|
|
|
|
|
empty string. This method will not consider a failure if the regexp matches |
906
|
|
|
|
|
|
|
with zero-width. |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=head2 maybe_expect |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
$str = $parser->maybe_expect( ... ) |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
@groups = $parser->maybe_expect( ... ) |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
I |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
A convenient shortcut equivalent to calling C within C, but |
917
|
|
|
|
|
|
|
implemented more efficiently, avoiding the exception-handling set up by |
918
|
|
|
|
|
|
|
C. Returns C or an empty list if the match fails. |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=cut |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
sub maybe_expect |
923
|
|
|
|
|
|
|
{ |
924
|
371
|
|
|
371
|
1
|
534
|
my $self = shift; |
925
|
371
|
|
|
|
|
615
|
my ( $expect ) = @_; |
926
|
|
|
|
|
|
|
|
927
|
371
|
100
|
|
|
|
801
|
ref $expect or $expect = qr/\Q$expect/; |
928
|
|
|
|
|
|
|
|
929
|
371
|
|
|
|
|
789
|
$self->skip_ws; |
930
|
371
|
100
|
|
|
|
4304
|
$self->{str} =~ m/\G$expect/gc or return; |
931
|
|
|
|
|
|
|
|
932
|
210
|
100
|
|
|
|
1378
|
return substr( $self->{str}, $-[0], $+[0]-$-[0] ) if !wantarray; |
933
|
22
|
100
|
|
|
|
75
|
return map { defined $-[$_] ? substr( $self->{str}, $-[$_], $+[$_]-$-[$_] ) : undef } 0 .. $#+; |
|
37
|
|
|
|
|
269
|
|
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
sub expect |
937
|
|
|
|
|
|
|
{ |
938
|
355
|
|
|
355
|
1
|
1121
|
my $self = shift; |
939
|
355
|
|
|
|
|
614
|
my ( $expect ) = @_; |
940
|
|
|
|
|
|
|
|
941
|
355
|
100
|
|
|
|
2545
|
ref $expect or $expect = qr/\Q$expect/; |
942
|
|
|
|
|
|
|
|
943
|
355
|
100
|
|
|
|
812
|
if( wantarray ) { |
944
|
31
|
100
|
|
|
|
75
|
my @ret = $self->maybe_expect( $expect ) or |
945
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
946
|
20
|
|
|
|
|
98
|
return @ret; |
947
|
|
|
|
|
|
|
} |
948
|
|
|
|
|
|
|
else { |
949
|
324
|
100
|
|
|
|
658
|
defined( my $ret = $self->maybe_expect( $expect ) ) or |
950
|
|
|
|
|
|
|
$self->fail( "Expected $expect" ); |
951
|
185
|
|
|
|
|
595
|
return $ret; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
} |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
=head2 substring_before |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
$str = $parser->substring_before( $literal ) |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
$str = $parser->substring_before( qr/pattern/ ) |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
I |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
Expects to possibly find a literal string or regexp pattern match. If it finds |
964
|
|
|
|
|
|
|
such, consume all the input text before but excluding this match, and return |
965
|
|
|
|
|
|
|
it. If it fails to find a match before the end of the current scope, consumes |
966
|
|
|
|
|
|
|
all the input text until the end of scope and return it. |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
This method does not consume the part of input that matches, only the text |
969
|
|
|
|
|
|
|
before it. It is not considered a failure if the substring before this match |
970
|
|
|
|
|
|
|
is empty. If a non-empty match is required, use the C method: |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
sub token_nonempty_part |
973
|
|
|
|
|
|
|
{ |
974
|
|
|
|
|
|
|
my $self = shift; |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
my $str = $parser->substring_before( "," ); |
977
|
|
|
|
|
|
|
length $str or $self->fail( "Expected a string fragment before ," ); |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
return $str; |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
Note that unlike most of the other token parsing methods, this method does not |
983
|
|
|
|
|
|
|
consume either leading or trailing whitespace around the substring. It is |
984
|
|
|
|
|
|
|
expected that this method would be used as part a parser to read quoted |
985
|
|
|
|
|
|
|
strings, or similar cases where whitespace should be preserved. |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
=cut |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
sub substring_before |
990
|
|
|
|
|
|
|
{ |
991
|
40
|
|
|
40
|
1
|
71
|
my $self = shift; |
992
|
40
|
|
|
|
|
68
|
my ( $expect ) = @_; |
993
|
|
|
|
|
|
|
|
994
|
40
|
100
|
|
|
|
182
|
ref $expect or $expect = qr/\Q$expect/; |
995
|
|
|
|
|
|
|
|
996
|
40
|
100
|
|
|
|
186
|
my $endre = ( defined $self->{endofscope} ) ? |
997
|
|
|
|
|
|
|
qr/$expect|$self->{endofscope}/ : |
998
|
|
|
|
|
|
|
$expect; |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
# NO skip_ws |
1001
|
|
|
|
|
|
|
|
1002
|
40
|
|
|
|
|
123
|
my $start = pos $self->{str}; |
1003
|
40
|
|
|
|
|
54
|
my $end; |
1004
|
40
|
100
|
|
|
|
522
|
if( $self->{str} =~ m/\G(?s:.*?)($endre)/ ) { |
1005
|
37
|
|
|
|
|
153
|
$end = $-[1]; |
1006
|
|
|
|
|
|
|
} |
1007
|
|
|
|
|
|
|
else { |
1008
|
3
|
|
|
|
|
9
|
$end = length $self->{str}; |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
|
1011
|
40
|
|
|
|
|
137
|
return $self->take( $end - $start ); |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
=head2 generic_token |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
$val = $parser->generic_token( $name, $re, $convert ) |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
I |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
Expects to find a token matching the precompiled regexp C<$re>. If provided, |
1021
|
|
|
|
|
|
|
the C<$convert> CODE reference can be used to convert the string into a more |
1022
|
|
|
|
|
|
|
convenient form. C<$name> is used in the failure message if the pattern fails |
1023
|
|
|
|
|
|
|
to match. |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
If provided, the C<$convert> function will be passed the parser and the |
1026
|
|
|
|
|
|
|
matching substring; the value it returns is returned from C. |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
$convert->( $parser, $substr ) |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
If not provided, the substring will be returned as it stands. |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
This method is mostly provided for subclasses to define their own token types. |
1033
|
|
|
|
|
|
|
For example: |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
sub token_hex |
1036
|
|
|
|
|
|
|
{ |
1037
|
|
|
|
|
|
|
my $self = shift; |
1038
|
|
|
|
|
|
|
$self->generic_token( hex => qr/[0-9A-F]{2}h/, sub { hex $_[1] } ); |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
=cut |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
sub generic_token |
1044
|
|
|
|
|
|
|
{ |
1045
|
230
|
|
|
230
|
1
|
348
|
my $self = shift; |
1046
|
230
|
|
|
|
|
401
|
my ( $name, $re, $convert ) = @_; |
1047
|
|
|
|
|
|
|
|
1048
|
230
|
50
|
|
|
|
483
|
$self->fail( "Expected $name" ) if $self->at_eos; |
1049
|
|
|
|
|
|
|
|
1050
|
230
|
|
|
|
|
549
|
$self->skip_ws; |
1051
|
230
|
100
|
|
|
|
4273
|
$self->{str} =~ m/\G$re/gc or |
1052
|
|
|
|
|
|
|
$self->fail( "Expected $name" ); |
1053
|
|
|
|
|
|
|
|
1054
|
169
|
|
|
|
|
945
|
my $match = substr( $self->{str}, $-[0], $+[0] - $-[0] ); |
1055
|
|
|
|
|
|
|
|
1056
|
169
|
100
|
|
|
|
688
|
return $convert ? $convert->( $self, $match ) : $match; |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
sub _token_generic |
1060
|
|
|
|
|
|
|
{ |
1061
|
226
|
|
|
226
|
|
346
|
my $self = shift; |
1062
|
226
|
|
|
|
|
1045
|
my %args = @_; |
1063
|
|
|
|
|
|
|
|
1064
|
226
|
|
|
|
|
407
|
my $name = $args{name}; |
1065
|
226
|
50
|
|
|
|
573
|
my $re = $args{pattern} ? $self->{patterns}{ $args{pattern} } : $args{re}; |
1066
|
226
|
|
|
|
|
319
|
my $convert = $args{convert}; |
1067
|
|
|
|
|
|
|
|
1068
|
226
|
|
|
|
|
523
|
$self->generic_token( $name, $re, $convert ); |
1069
|
|
|
|
|
|
|
} |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
=head2 token_int |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
$int = $parser->token_int |
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
Expects to find an integer in decimal, octal or hexadecimal notation, and |
1076
|
|
|
|
|
|
|
consumes it. Negative integers, preceeded by C<->, are also recognised. |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
=cut |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
sub token_int |
1081
|
|
|
|
|
|
|
{ |
1082
|
134
|
|
|
134
|
1
|
345
|
my $self = shift; |
1083
|
|
|
|
|
|
|
$self->_token_generic( |
1084
|
|
|
|
|
|
|
name => "int", |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
pattern => "int", |
1087
|
|
|
|
|
|
|
convert => sub { |
1088
|
104
|
|
|
104
|
|
195
|
my $int = $_[1]; |
1089
|
104
|
100
|
|
|
|
299
|
my $sign = ( $int =~ s/^-// ) ? -1 : 1; |
1090
|
|
|
|
|
|
|
|
1091
|
104
|
|
|
|
|
166
|
$int =~ s/^0o/0/; |
1092
|
|
|
|
|
|
|
|
1093
|
104
|
100
|
|
|
|
326
|
return $sign * oct $int if $int =~ m/^0/; |
1094
|
99
|
|
|
|
|
834
|
return $sign * $int; |
1095
|
|
|
|
|
|
|
}, |
1096
|
134
|
|
|
|
|
596
|
); |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
=head2 token_float |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
$float = $parser->token_float |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
I |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
Expects to find a number expressed in floating-point notation; a sequence of |
1106
|
|
|
|
|
|
|
digits possibly prefixed by C<->, possibly containing a decimal point, |
1107
|
|
|
|
|
|
|
possibly followed by an exponent specified by C followed by an integer. The |
1108
|
|
|
|
|
|
|
numerical value is then returned. |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
=cut |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
sub token_float |
1113
|
|
|
|
|
|
|
{ |
1114
|
20
|
|
|
20
|
1
|
68
|
my $self = shift; |
1115
|
|
|
|
|
|
|
$self->_token_generic( |
1116
|
|
|
|
|
|
|
name => "float", |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
pattern => "float", |
1119
|
18
|
|
|
18
|
|
135
|
convert => sub { $_[1] + 0 }, |
1120
|
20
|
|
|
|
|
82
|
); |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
=head2 token_number |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
$number = $parser->token_number |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
I |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
Expects to find a number expressed in either of the above forms. |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
=cut |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
sub token_number |
1134
|
|
|
|
|
|
|
{ |
1135
|
7
|
|
|
7
|
1
|
34
|
my $self = shift; |
1136
|
7
|
|
|
|
|
26
|
$self->any_of( \&token_float, \&token_int ); |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
=head2 token_string |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
$str = $parser->token_string |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
Expects to find a quoted string, and consumes it. The string should be quoted |
1144
|
|
|
|
|
|
|
using C<"> or C<'> quote marks. |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
The content of the quoted string can contain character escapes similar to |
1147
|
|
|
|
|
|
|
those accepted by C or Perl. Specifically, the following forms are recognised: |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
\a Bell ("alert") |
1150
|
|
|
|
|
|
|
\b Backspace |
1151
|
|
|
|
|
|
|
\e Escape |
1152
|
|
|
|
|
|
|
\f Form feed |
1153
|
|
|
|
|
|
|
\n Newline |
1154
|
|
|
|
|
|
|
\r Return |
1155
|
|
|
|
|
|
|
\t Horizontal Tab |
1156
|
|
|
|
|
|
|
\0, \012 Octal character |
1157
|
|
|
|
|
|
|
\x34, \x{5678} Hexadecimal character |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
C's C<\v> for vertical tab is not supported as it is rarely used in practice |
1160
|
|
|
|
|
|
|
and it collides with Perl's C<\v> regexp escape. Perl's C<\c> for forming other |
1161
|
|
|
|
|
|
|
control characters is also not supported. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
=cut |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
my %escapes = ( |
1166
|
|
|
|
|
|
|
a => "\a", |
1167
|
|
|
|
|
|
|
b => "\b", |
1168
|
|
|
|
|
|
|
e => "\e", |
1169
|
|
|
|
|
|
|
f => "\f", |
1170
|
|
|
|
|
|
|
n => "\n", |
1171
|
|
|
|
|
|
|
r => "\r", |
1172
|
|
|
|
|
|
|
t => "\t", |
1173
|
|
|
|
|
|
|
); |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
sub token_string |
1176
|
|
|
|
|
|
|
{ |
1177
|
53
|
|
|
53
|
1
|
252
|
my $self = shift; |
1178
|
|
|
|
|
|
|
|
1179
|
53
|
100
|
|
|
|
126
|
$self->fail( "Expected string" ) if $self->at_eos; |
1180
|
|
|
|
|
|
|
|
1181
|
52
|
|
|
|
|
144
|
my $pos = pos $self->{str}; |
1182
|
|
|
|
|
|
|
|
1183
|
52
|
|
|
|
|
131
|
$self->skip_ws; |
1184
|
52
|
100
|
|
|
|
448
|
$self->{str} =~ m/\G($self->{patterns}{string_delim})/gc or |
1185
|
|
|
|
|
|
|
$self->fail( "Expected string delimiter" ); |
1186
|
|
|
|
|
|
|
|
1187
|
32
|
|
|
|
|
94
|
my $delim = $1; |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
$self->{str} =~ m/ |
1190
|
|
|
|
|
|
|
\G( |
1191
|
|
|
|
|
|
|
(?: |
1192
|
|
|
|
|
|
|
\\[0-7]{1,3} # octal escape |
1193
|
|
|
|
|
|
|
|\\x[0-9A-F]{2} # 2-digit hex escape |
1194
|
|
|
|
|
|
|
|\\x\{[0-9A-F]+\} # {}-delimited hex escape |
1195
|
|
|
|
|
|
|
|\\. # symbolic escape |
1196
|
|
|
|
|
|
|
|[^\\$delim]+ # plain chunk |
1197
|
|
|
|
|
|
|
)*? |
1198
|
|
|
|
|
|
|
)$delim/gcix or |
1199
|
32
|
50
|
|
|
|
1033
|
pos($self->{str}) = $pos, $self->fail( "Expected contents of string" ); |
1200
|
|
|
|
|
|
|
|
1201
|
32
|
|
|
|
|
116
|
my $string = $1; |
1202
|
|
|
|
|
|
|
|
1203
|
32
|
|
|
|
|
98
|
$string =~ s<\\(?:([0-7]{1,3})|x([0-9A-F]{2})|x\{([0-9A-F]+)\}|(.))> |
1204
|
|
|
|
|
|
|
[defined $1 ? chr oct $1 : |
1205
|
|
|
|
|
|
|
defined $2 ? chr hex $2 : |
1206
|
11
|
50
|
|
|
|
77
|
defined $3 ? chr hex $3 : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
exists $escapes{$4} ? $escapes{$4} : $4]egi; |
1208
|
32
|
|
|
|
|
126
|
|
1209
|
|
|
|
|
|
|
return $string; |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
=head2 token_ident |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
$ident = $parser->token_ident |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
Expects to find an identifier, and consumes it. |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=cut |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
sub token_ident |
1221
|
72
|
|
|
72
|
1
|
145
|
{ |
1222
|
72
|
|
|
|
|
229
|
my $self = shift; |
1223
|
|
|
|
|
|
|
$self->_token_generic( |
1224
|
|
|
|
|
|
|
name => "ident", |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
pattern => "ident", |
1227
|
|
|
|
|
|
|
); |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
=head2 token_kw |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
$keyword = $parser->token_kw( @keywords ) |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
Expects to find a keyword, and consumes it. A keyword is defined as an |
1235
|
|
|
|
|
|
|
identifier which is exactly one of the literal values passed in. |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub token_kw |
1240
|
2
|
|
|
2
|
1
|
14
|
{ |
1241
|
2
|
|
|
|
|
5
|
my $self = shift; |
1242
|
|
|
|
|
|
|
my @acceptable = @_; |
1243
|
2
|
|
|
|
|
9
|
|
1244
|
|
|
|
|
|
|
$self->skip_ws; |
1245
|
2
|
|
|
|
|
5
|
|
1246
|
|
|
|
|
|
|
my $pos = pos $self->{str}; |
1247
|
2
|
50
|
|
|
|
10
|
|
1248
|
|
|
|
|
|
|
defined( my $kw = $self->token_ident ) or |
1249
|
|
|
|
|
|
|
return undef; |
1250
|
4
|
|
|
|
|
22
|
|
1251
|
2
|
100
|
|
|
|
6
|
grep { $_ eq $kw } @acceptable or |
1252
|
|
|
|
|
|
|
pos($self->{str}) = $pos, $self->fail( "Expected any of ".join( ", ", @acceptable ) ); |
1253
|
1
|
|
|
|
|
4
|
|
1254
|
|
|
|
|
|
|
return $kw; |
1255
|
|
|
|
|
|
|
} |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
package # hide from indexer |
1258
|
|
|
|
|
|
|
Parser::MGC::Failure; |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
sub new |
1261
|
271
|
|
|
271
|
|
376
|
{ |
1262
|
271
|
|
|
|
|
535
|
my $class = shift; |
1263
|
271
|
|
|
|
|
501
|
my $self = bless {}, $class; |
|
271
|
|
|
|
|
1402
|
|
1264
|
271
|
|
|
|
|
1401
|
@{$self}{qw( message parser pos )} = @_; |
1265
|
|
|
|
|
|
|
return $self; |
1266
|
|
|
|
|
|
|
} |
1267
|
31
|
|
|
31
|
|
31085
|
|
|
31
|
|
|
|
|
23988
|
|
|
31
|
|
|
|
|
255
|
|
1268
|
|
|
|
|
|
|
use overload '""' => "STRING"; |
1269
|
|
|
|
|
|
|
sub STRING |
1270
|
8
|
|
|
8
|
|
2112
|
{ |
1271
|
|
|
|
|
|
|
my $self = shift; |
1272
|
8
|
|
|
|
|
23
|
|
1273
|
8
|
|
|
|
|
54
|
my $parser = $self->{parser}; |
1274
|
|
|
|
|
|
|
my ( $linenum, $col, $text ) = $parser->where( $self->{pos} ); |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
# Column number only counts characters. There may be tabs in there. |
1277
|
|
|
|
|
|
|
# Rather than trying to calculate the visual column number, just print the |
1278
|
|
|
|
|
|
|
# indentation as it stands. |
1279
|
8
|
|
|
|
|
19
|
|
1280
|
8
|
|
|
|
|
37
|
my $indent = substr( $text, 0, $col ); |
1281
|
|
|
|
|
|
|
$indent =~ s/[^ \t]/ /g; # blank out all the non-whitespace |
1282
|
8
|
|
|
|
|
19
|
|
1283
|
8
|
100
|
100
|
|
|
52
|
my $filename = $parser->{filename}; |
1284
|
|
|
|
|
|
|
my $in_file = ( defined $filename and !ref $filename ) |
1285
|
|
|
|
|
|
|
? "in $filename " : ""; |
1286
|
8
|
|
|
|
|
157
|
|
1287
|
|
|
|
|
|
|
return "$self->{message} ${in_file}on line $linenum at:\n" . |
1288
|
|
|
|
|
|
|
"$text\n" . |
1289
|
|
|
|
|
|
|
"$indent^\n"; |
1290
|
|
|
|
|
|
|
} |
1291
|
|
|
|
|
|
|
|
1292
|
31
|
|
|
31
|
|
7825
|
# Provide fallback operators for cmp, eq, etc... |
|
31
|
|
|
|
|
71
|
|
|
31
|
|
|
|
|
156
|
|
1293
|
|
|
|
|
|
|
use overload fallback => 1; |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=head1 EXAMPLES |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=head2 Accumulating Results Using Variables |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
Although the structure-forming methods all return a value, obtained from their |
1300
|
|
|
|
|
|
|
nested parsing code, it can sometimes be more convenient to use a variable to |
1301
|
|
|
|
|
|
|
accumulate a result in instead. For example, consider the following parser |
1302
|
|
|
|
|
|
|
method, designed to parse a set of C assignments, such as might |
1303
|
|
|
|
|
|
|
be found in a configuration file, or YAML/JSON-style mapping value. |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
sub parse_dict |
1306
|
|
|
|
|
|
|
{ |
1307
|
|
|
|
|
|
|
my $self = shift; |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
my %ret; |
1310
|
|
|
|
|
|
|
$self->list_of( ",", sub { |
1311
|
|
|
|
|
|
|
my $key = $self->token_ident; |
1312
|
|
|
|
|
|
|
exists $ret{$key} and $self->fail( "Already have a mapping for '$key'" ); |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
$self->expect( ":" ); |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
$ret{$key} = $self->parse_value; |
1317
|
|
|
|
|
|
|
} ); |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
return \%ret |
1320
|
|
|
|
|
|
|
} |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
Instead of using the return value from C, this method accumulates |
1323
|
|
|
|
|
|
|
values in the C<%ret> hash, eventually returning a reference to it as its |
1324
|
|
|
|
|
|
|
result. Because of this, it can perform some error checking while it parses; |
1325
|
|
|
|
|
|
|
namely, rejecting duplicate keys. |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
=head1 TODO |
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
=over 4 |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
=item * |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
Make unescaping of string constants more customisable. Possibly consider |
1334
|
|
|
|
|
|
|
instead a C using a loop over C. |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=item * |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
Easy ability for subclasses to define more token types as methods. Perhaps |
1339
|
|
|
|
|
|
|
provide a class method such as |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
__PACKAGE__->has_token( hex => qr/[0-9A-F]+/i, sub { hex $_[1] } ); |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
=item * |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Investigate how well C can cope with buffer splitting across |
1346
|
|
|
|
|
|
|
other tokens than simply skippable whitespace |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=back |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
=head1 AUTHOR |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
Paul Evans |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
=cut |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
0x55AA; |