File Coverage

blib/lib/Language/FormulaEngine/Parser.pm

Criterion	Covered	Total	%
statement	226	234	96.5
branch	76	88	86.3
condition	27	35	77.1
subroutine	56	58	96.5
pod	33	33	100.0
total	418	448	93.3

line	stmt	bran	cond	sub	pod	time	code
1							package Language::FormulaEngine::Parser;
2	7			7		456462	use Moo;
	7					24126
	7					55
3	7			7		4897	use Carp;
	7					38
	7					456
4	7			7		1090	use Try::Tiny;
	7					2713
	7					442
5	7			7		52	use List::Util qw( min max );
	7					26
	7					549
6							use Language::FormulaEngine::Parser::ContextUtil
7	7			7		3559	qw( calc_text_coordinates format_context_string format_context_multiline );
	7					22
	7					474
8	7			7		1598	use namespace::clean;
	7					30123
	7					67
9
10							# ABSTRACT: Create parse tree from an input string
11							our $VERSION = '0.06'; # VERSION
12
13
14							has parse_tree => ( is => 'rw' );
15							has error => ( is => 'rw' );
16							has functions => ( is => 'rw' );
17							has symbols => ( is => 'rw' );
18
19							sub parse {
20	273			273	1	46962	my ($self, $input)= @_;
21	273					819	$self->reset;
22	273					560	$self->{input}= $input;
23	273					834	pos( $self->{input} )= 0;
24							try {
25	273			273		11808	$self->next_token;
26	272					724	my $tree= $self->parse_expr;
27							# It is an error if there was un-processed input.
28	271	100				610	$self->token_type eq '0'
29							or die sprintf('Unexpected %s "%s" near %s',
30							$self->token_type, $self->token_value, $self->token_context);
31	270					944	$self->parse_tree($tree);
32							} catch {
33	3			3		48	chomp;
34	3					17	$self->error($_);
35	273					1874	};
36	273					5239	return $self->parse_tree;
37							}
38
39							sub reset {
40	273			273	1	478	my $self= shift;
41	273					1262	$self->parse_tree(undef);
42	273					673	$self->error(undef);
43	273					766	$self->functions({});
44	273					664	$self->symbols({});
45	273					485	delete @{$self}{'input','token_type','token_value','token_pos'};
	273					1010
46	273					486	$self;
47							}
48
49
50							sub deparse {
51	39			39	1	3641	my ($self, $node)= @_;
52	39	100				92	$node= $self->parse_tree unless @_ > 1;
53	39					83	$node->deparse($self);
54							}
55
56
57	0			0	1	0	sub input { shift->{input} }
58	0			0	1	0	sub input_pos { pos( shift->{input} ) }
59	328			328	1	1110	sub token_type { shift->{token_type} }
60	47			47	1	213	sub token_value { shift->{token_value} }
61	46			46	1	108	sub token_pos { shift->{token_pos} }
62
63
64							sub next_token {
65	2015			2015	1	49032	my $self= shift;
66
67							# If already reached end of input, throw an exception.
68							die "Can't call next_token after end of input"
69	2015	50	100			5664	if '0' eq ($self->{token_type}\|\|'');
70
71							# Detect the next token
72	2015					4440	my ($type, $val, $pos0, $pos1)= ('','');
73	2015					4218	while ($type eq '') {
74	2222		100			4779	$pos0= pos($self->{input}) \|\| 0;
75	2222					55304	($type, $val)= $self->scan_token;
76	2222		100			11609	$pos1= pos($self->{input}) \|\| 0;
77							# Check for end of buffer, even if it matched.
78	2222	100				4743	if ($pos1 >= length $self->{input}) {
79							#pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80							#if ($self->_grow_buffer) {
81							# $log->trace("grow buffer succeeded");
82							# $type= '';
83							# next;
84							#}
85							#pos($self->{input})= $pos1; # restore actual position\
86							# If we didn't get a token or are ignoring this final token, then return the EOF token
87	557	100	100			1802	if (!defined $type \|\| $type eq '') {
88	281					468	$type= 0;
89	281					445	$val= '';
90	281					411	$pos0= $pos1;
91	281					504	last;
92							}
93							}
94	1941	100				3812	defined $type
95							or die "Unknown syntax at ".$self->token_context."\n";
96	1940	50				4835	$pos1 > $pos0
97							or croak "Tokenizer consumed zero characters";
98							}
99	2014					3234	@{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
	2014					4561
100	2014					3767	return $type, $val;
101							}
102
103
104							sub consume_token {
105	937			937	1	18832	my $self= shift;
106							croak "Can't consume EOF"
107	937	100				3093	if $self->{token_type} eq '0';
108	927					1879	my $val= $self->{token_value};
109	927					2072	$self->next_token;
110	927					2302	return $val;
111							}
112
113							sub token_context {
114	2			2	1	6	my ($self, %args)= @_;
115							return format_context_multiline($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0, \%args)
116	2	50	0			6	if delete $args{multiline};
			0
117	2		100			20	return format_context_string($self->{input}, $self->{token_pos}\|\|0, pos($self->{input})\|\|0);
			100
118							}
119
120
121	723			723	1	1477	sub parse_expr { shift->parse_or_expr; }
122
123							sub parse_or_expr {
124	723			723	1	1113	my $self= shift;
125	723					1396	my $first= $self->parse_and_expr;
126	722	50				2054	return $first unless $self->{token_type} eq 'or';
127	0					0	my @or_expr= $first;
128	0					0	while ($self->{token_type} eq 'or') {
129	0					0	$self->next_token;
130	0					0	push @or_expr, $self->parse_and_expr;
131							}
132	0					0	return $self->new_call('or', \@or_expr);
133							}
134
135							sub parse_and_expr {
136	723			723	1	1132	my $self= shift;
137	723					1485	my $first= $self->parse_not_expr;
138	722	100				1839	return $first unless $self->{token_type} eq 'and';
139	7					19	my @and_expr= $first;
140	7					35	while ($self->{token_type} eq 'and') {
141	7					21	$self->next_token;
142	7					18	push @and_expr, $self->parse_not_expr;
143							}
144	7					20	return $self->new_call('and', \@and_expr);
145							}
146
147							sub parse_not_expr {
148	730			730	1	1120	my $self= shift;
149	730	100	66			2886	if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150	5					14	$self->next_token;
151	5					15	return $self->new_call('not', [ $self->parse_cmp_expr ]);
152							}
153	725					1511	return $self->parse_cmp_expr;
154							}
155
156							my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157							sub parse_cmp_expr {
158	730			730	1	1064	my $self= shift;
159	730					1485	my $first= $self->parse_sum_expr;
160	729	100				2066	return $first unless $_cmp_ops{$self->{token_type}};
161	23					88	my @expr= $first;
162	23					68	while ($_cmp_ops{$self->{token_type}}) {
163	31					76	push @expr, $self->new_string($self->{token_type});
164	31					86	$self->next_token;
165	31					77	push @expr, $self->parse_sum_expr;
166							}
167	23					69	return $self->new_call('compare', \@expr);
168							}
169
170							sub parse_sum_expr {
171	761			761	1	1203	my $self= shift;
172	761					1508	my $first= $self->parse_prod_expr;
173	760	100	100			2722	return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174	24					63	my @sum_expr= $first;
175	24		100			80	while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176	31					69	my $negate= $self->consume_token eq '-';
177	31					82	my $operand= $self->parse_prod_expr;
178	31	100				166	push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179							}
180	24					67	return $self->new_call('sum', \@sum_expr);
181							}
182
183							sub parse_prod_expr {
184	792			792	1	1224	my $self= shift;
185	792					1472	my $value= $self->parse_unit_expr;
186	791		100			3380	while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187	35					79	my $op= $self->consume_token;
188	35					87	my $right= $self->parse_unit_expr;
189	35	100				164	$value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190							}
191	791					1347	return $value;
192							}
193
194							sub parse_unit_expr {
195	857			857	1	1271	my $self= shift;
196	857					1236	my $negate= 0;
197	857					1170	my $expr;
198
199	857	100				1820	if ($self->{token_type} eq '-') {
200	30					78	$self->next_token;
201	30					87	return $self->get_negative($self->parse_unit_expr);
202							}
203
204	827	100				1616	if ($self->{token_type} eq '(') {
205	11					32	$self->next_token;
206	11					38	my $args= $self->parse_list;
207							die "Expected ')' near ".$self->token_context."\n"
208	11	50				32	if $self->{token_type} ne ')';
209	11					32	$self->next_token;
210	11	100				44	return @$args > 1? $self->new_call('list', $args) : $args->[0];
211							}
212
213	816	100				1661	if ($self->{token_type} eq 'Number') {
214	317					652	return $self->new_number($self->consume_token);
215							}
216
217	499	100				968	if ($self->{token_type} eq 'String') {
218	84					179	return $self->new_string($self->consume_token);
219							}
220
221	415	100				883	if ($self->{token_type} eq 'Identifier') {
222	414					809	my $id= $self->consume_token;
223	414	100				998	if ($self->{token_type} eq '(') {
224	261					684	$self->next_token;
225	261	100				848	my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226							die "Expected ')' near ".$self->token_context."\n"
227	261	50				593	if $self->{token_type} ne ')';
228	261					640	$self->next_token;
229	261					676	return $self->new_call($id, $args);
230							}
231							else {
232	153					392	return $self->new_symbol($id);
233							}
234							}
235
236	1	50				11	if ($self->{token_type} eq '0') {
237	1					9	die "Expected expression component near (end of input)";
238							}
239
240	0					0	die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241							}
242
243							sub parse_list {
244	263			263	1	447	my $self= shift;
245	263					621	my @args= $self->parse_expr;
246	263					659	while ($self->{token_type} eq ',') {
247	188					511	$self->next_token;
248	188					482	push @args, $self->parse_expr;
249							}
250	263					595	return \@args;
251							}
252
253
254	6			6	1	35	sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255	6			6	1	82	sub math_operators { qw( + - * / ) }
256	6			6	1	22	sub logic_operators { qw( and or not ! ) }
257	6			6	1	19	sub list_operators { ',', '(', ')' }
258							sub keyword_map {
259							return {
260	6			6	1	31	(map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
	132					353
261							'=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262							"\x{2264}" => '<=', "\x{2265}" => '>='
263							}
264							}
265							sub scanner_rules {
266	6			6	1	15	my $self= shift;
267	6					16	my $keywords= $self->keyword_map;
268							my $kw_regex= join '\|', map "\Q$_\E",
269	6					70	sort { length($b) <=> length($a) } # longest keywords get priority
	389					623
270							keys %$keywords;
271
272							# Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273							# It seems to damage the scalar $1, but copying it first fixes the problem.
274	6	50	33			83	my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275							return (
276							# Pattern Name, Pattern, Token Type and Token Value
277	6					629	[ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278							[ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279							[ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280							[ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281							[ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282							# Single or double quoted string, using Pascal-style repeated quotes for escaping
283							[ 'StringLiteral', qr/(?:"((?:[^"]\|""))"\|'((?:[^']\|''))')/, q%
284							do{
285							my $str= defined $1? $1 : $2;
286							$str =~ s/""/"/g if defined $1;
287							$str =~ s/''/'/g if defined $2;
288							(String => $str)
289							}
290							%],
291							);
292							}
293
294							sub _build_scan_token_method_body {
295	6			6		21	my ($self, $rules)= @_;
296	6					153	return join('', map
297							' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298							@$rules
299							).' return;' # return empty list of no rule matched
300							}
301
302							sub _build_scan_token_method {
303	6			6		15	my ($pkg, $method_name)= @_;
304	6	50				39	$pkg= ref $pkg if ref $pkg;
305	6	50				25	$method_name= 'scan_token' unless defined $method_name;
306	6					25	my @rules= $pkg->scanner_rules;
307							# collect variables which should be available to the code
308	6	100				32	my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
	36					91
	6					24
309	6					57	my $code= join "\n",
310							(map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311							"sub ${pkg}::$method_name {",
312							' my $self= shift;',
313							$pkg->_build_scan_token_method_body(\@rules),
314							"}\n";
315							# closure needed for 5.8 and 5.10 which complain about using a lexical
316							# in a sub declared at package scope.
317	7			7		20860	no warnings 'redefine','closure';
	7					19
	7					6114
318	6	50		2222	1	3318	eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
	2222	100				5184
	2222	100				7152
	2012	100				6823
	1690	100				3757
	1688	100				7164
	791	100				3108
	366	100				1214
	87	100				332
	87	100				292
	87					271
	87					358
	279					739
319	6					81	return $pkg->can('scan_token');
320							}
321
322	2			2	1	10	sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
	2					52
323
324
325	361			361		956	sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
326	379			379		1400	sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
327							sub Language::FormulaEngine::Parser::Node::Call::evaluate {
328	164			164		1237	my ($self, $namespace)= @_;
329	164					535	$namespace->evaluate_call($self);
330							}
331							sub Language::FormulaEngine::Parser::Node::Call::deparse {
332	12			12		20	my ($node, $parser)= @_;
333							return $node->function_name . (
334	12					23	!@{$node->parameters}? '()'
335	12	100				28	: '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
	11					22
336							)
337							}
338
339							sub new_call {
340	362			362	1	1129	my ($self, $fn, $params)= @_;
341	362					1105	$self->functions->{$fn}++; # record dependency on this function
342	362					1501	bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
343							}
344
345
346	90			90		158	sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
	90					288
347							sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
348	62			62		122	my ($self, $namespace)= @_;
349	62					170	$namespace->get_value($$self);
350							}
351							sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
352	14			14		39	shift->symbol_name;
353							}
354
355							sub new_symbol {
356	153			153	1	319	my ($self, $name)= @_;
357	153					502	$self->symbols->{$name}++; # record dependency on this variable
358	153					474	bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
359							}
360
361
362	62			62		124	sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
	62					235
363	51			51		91	sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
	51					196
364							sub _str_escape {
365	6			6		11	my $str= shift;
366	6					13	$str =~ s/'/''/g;
367	6					23	"'$str'";
368							}
369							sub Language::FormulaEngine::Parser::Node::String::deparse {
370	6			6		14	_str_escape(shift->string_value);
371							}
372
373							sub new_string {
374	115			115	1	320	my ($self, $text)= @_;
375	115					382	bless \$text, 'Language::FormulaEngine::Parser::Node::String'
376							}
377
378
379	204			204		382	sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
	204					1224
380	140			140		230	sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
	140					551
381	7			7		14	sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
382
383							sub new_number {
384	352			352	1	699	my $value= $_[1]+0;
385	352					1061	bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
386							}
387
388
389							sub get_negative {
390	40			40	1	79	my ($self, $node)= @_;
391	40	100				209	return $self->new_number(-$node->number_value) if $node->can('number_value');
392	5	50	66			25	return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
393	5					17	return $self->new_call('negative', [$node]);
394							}
395
396							1;
397
398							__END__
399
400							=pod
401
402							=encoding UTF-8
403
404							=head1 NAME
405
406							Language::FormulaEngine::Parser - Create parse tree from an input string
407
408							=head1 VERSION
409
410							version 0.06
411
412							=head1 SYNOPSIS
413
414							my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
415
416							=head1 DESCRIPTION
417
418							This class scans tokens from an input string and builds a parse tree. In compiler terminology,
419							it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
420							is easy and gives good error messages. It only parses strings, but leaves room for subclasses
421							to implement streaming. By default, the parser simply applies a Grammar to the input, without
422							checking whether the functions or variables exist, but can be subclassed to do more detailed
423							analysis during the parse.
424
425							The generated parse tree is made up of Function nodes (each infix operator is converted to a
426							named function) and each Function node may contain Symbols, Strings, Numbers, and other
427							Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
428							to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
429							not include token/context information; this could also be added by a subclass.
430
431							=head1 PUBLIC API
432
433							=head2 parse
434
435							Parse a new input text, updating all derived attributes with the result of the operation.
436							It returns the value of L</parse_tree> (which is undef if the parse failed).
437							On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
438							contain useful diagnostic information.
439
440							=head2 parse_tree
441
442							This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
443
444							=head2 error
445
446							This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
447							the parse.
448
449							=head2 functions
450
451							A set (hashref) of all function names encountered during the parse.
452
453							=head2 symbols
454
455							A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
456
457							=head2 reset
458
459							Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
460
461							=head2 deparse
462
463							my $formula_text= $parser->deparse($tree);
464
465							Return a canonical formula text for the parse tree, or a parse tree that you supply.
466
467							=head1 EXTENSIBLE API
468
469							These methods and attributes are documented for purposes of subclassing the parser.
470
471							=head2 input
472
473							The input string being scanned.
474							Code within the parser should access this as C<< $self->{input} >> for efficiency.
475
476							=head2 input_pos
477
478							Shortcut for C<< pos($self->{input}) >>.
479
480							=head2 token_type
481
482							Type of current token scanned from C<input>.
483							Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
484
485							=head2 token_value
486
487							Value of current token scanned from C<input>, with escape sequences and etc resolved to a
488							sensible perl value.
489							Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
490
491							=head2 token_pos
492
493							An offset within C<input> where this token started.
494							Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
495
496							=head2 next_token
497
498							Advance to the next token, replacing the values of C<token_> variables and updating
499							C<input_pos>. Returns the token_type, of which all are true except EOF which has a
500							type of C<0>, so this also means the function returns true if it parsed a token and
501							false if it reached EOF. It dies if no token could be parsed.
502							If you call next_token again after the eof token, it throws an exception.
503
504							This method is a wrapper around L</scan_token>. Override that method to add new token types.
505
506							=head2 scan_token
507
508							Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
509							the syntax is invalid. This is intended to be overridden by subclasses.
510
511							=head2 consume_token
512
513							return $self->consume_token if $self->{token_type} eq $desired_type;
514
515							This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
516
517							=head2 token_context
518
519							my $text= $self->token_context(%options);
520
521							Default behavior generates a string like:
522
523							"'blah blah' on line 15, char 12"
524
525							Passing C<< token_context(multiline => 1) >> generates a string like
526
527							"Expected something else at line 15, char 16\n" .
528							"blah blah blah token blah blah\n" .
529							" ^^^^^\n"
530
531							Multiline additionally takes arguments as described in
532							L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
533
534							=head1 GRAMMAR
535
536							=head2 Parse Rules
537
538							The default grammar implements the following rules:
539
540							expr ::= or_expr
541							or_expr ::= and_expr ( 'or' and_expr )*
542							and_expr ::= not_expr ( 'and' not_expr )*
543							not_expr ::= ( 'not' \| '!' ) cmp_expr \| cmp_expr
544							cmp_expr ::= sum_expr ( ( '=' \| '==' \| '<>' \| '\u2260' \| '<' \| '<=' \| '>' \| '>=' ) sum_expr )*
545							sum_expr ::= prod_expr ( ('+' \| '-') prod_expr )*
546							prod_expr ::= ( unit_expr ('' \| '/') ) unit_expr
547							unit_expr ::= '-' unit_expr \| Identifier '(' list ')' \| '(' (expr\|list) ')' \| Identifier \| Number \| String
548							list ::= expr ( ',' expr )* ','?
549
550							C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
551
552							The parser uses a Recursive Descent algorithm implemented as the following method calls.
553							Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
554
555							=over
556
557							=item parse_expr
558
559							=item parse_or_expr
560
561							=item parse_and_expr
562
563							=item parse_not_expr
564
565							=item parse_cmp_expr
566
567							=item parse_sum_expr
568
569							=item parse_prod_expr
570
571							=item parse_unit_expr
572
573							=item parse_list
574
575							=back
576
577							=head2 Token Types
578
579							=over
580
581							=item C<'Number'>
582
583							All the common decimal representations of integers and floating point numbers
584							which perl can parse. Optional decimals and decimal point followed by decimals
585							and optional exponent, ending at either the end of the input or a non-alphanumeric.
586
587							=item C<'String'>
588
589							A single-quoted or double-quoted string, treating a double occurrence of the quote
590							character to mean a literal quote character. ("Pascal style")
591
592							'apostrophes are''nt hard'
593
594							There are no escape sequences though, so to get control characters or awkward unicode
595							into a string you need something like:
596
597							concat("smile ",char(0x263A))
598
599							which depends on those functions being available in the namespace.
600
601							=item Keywords...
602
603							Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
604							such as operators and punctuation.
605							The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
606							is the actual text that was captured. The pattern matches the longest keyword possible.
607
608							=item C<'Identifier'>
609
610							Any alpha (or underscore) followed by any run of alphanumerics,
611							(including underscore and period).
612
613							=back
614
615							=head2 Customizing the Token Scanner
616
617							The tokens are parsed using a series of regex tests. The regexes and the code that handles a
618							match of that regex are found in package attribute L</scanner_rules>. These regexes and code
619							fragments get lazily compiled into a package method on the first use (per package).
620							Meanwhile, several of those regex are built from other package attributes.
621
622							=over
623
624							=item scanner_rules
625
626							This package method returns a list (not arrayref) of ordered elements of the form
627							C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
628							the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
629							or add/remove your own rules. The regexes are attempted in the order they appear in this
630							list. You do not need to use "\G" or "/gc" on these regexes because those are added
631							automatically during compilation.
632
633							=item keyword_map
634
635							This package method returns a hashref of all known keywords, mapped to their canonical form.
636							So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
637							become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
638							the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
639							can be overridden separately.
640
641							This method is called once during the compilation of L</scan_token>, and the result is then
642							made into a constant and referenced by the compiled method, so dynamic changes to the output
643							of this method will be ignored.
644
645							=item cmp_operators
646
647							Package method that returns a list of comparison operators, like '<', '>=', etc.
648
649							=item math_operators
650
651							Package method that returns a list of math operators, like '*', '+', etc.
652
653							=item logic_operators
654
655							Package method that returns a list of keywords like 'and', 'or', etc.
656
657							=item list_operators
658
659							Package method that returns a list of '(', ')', ','
660
661							=back
662
663							=head2 Parse Nodes
664
665							The parse tree takes a minimalist approach to node classification. In this default
666							implementation, number values, string values, and symbolic references have just a simple
667							wrapper around the value, and function calls are just a pair of function name and list of
668							arguments. All language operators are represented as function calls.
669
670							A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
671
672							The class name of the blessed nodes should be ignored. A function is anything which
673							C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
674							anything which C<< can("number_value") >> and a symbolic reference is anything which
675							C<< can("symbolic_name") >>.
676
677							Subclasses of Parser should implemnt new node types as needed. You probable also need to
678							update L</deparse>.
679
680							The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
681							class, so that you can easily subclass C<Parser> and override which class of node is getting
682							created.
683
684							=over
685
686							=item new_call
687
688							$node= $parser->new_call( $function_name, $parameters );
689
690							Generate a node for a function call. The returned node has attributes C<function_name>
691							and C<parameters>
692
693							=item new_symbol
694
695							$node= $parser->new_symbol($symbol_name);
696
697							A reference to a symbolic value (i.e. variable or constant).
698							It has one attribute C<symbol_name>.
699
700							=item new_string
701
702							$node= $parser->new_string($string_value);
703
704							A string literal. It has an attribute C<string_value> holding the raw value.
705
706							=item new_number
707
708							$plain_scalar= $parser->new_number($value);
709
710							A numeric constant. It has an attribute C<number_value> holding the raw value.
711
712							=item get_negative
713
714							$negative_node= $parser->get_negative( $node );
715
716							Utility method to get the "opposite of" a parse node. By default, this wraps it with the
717							function C<'negative'>, unless it already was that function then it unwraps the parameter.
718							It performs simple negation on numbers.
719
720							=back
721
722							=head1 AUTHOR
723
724							Michael Conrad <mconrad@intellitree.com>
725
726							=head1 COPYRIGHT AND LICENSE
727
728							This software is copyright (c) 2021 by Michael Conrad, IntelliTree Solutions llc.
729
730							This is free software; you can redistribute it and/or modify it under
731							the same terms as the Perl 5 programming language system itself.
732
733							=cut