File Coverage

blib/lib/Language/FormulaEngine/Parser.pm
Criterion Covered Total %
statement 246 253 97.2
branch 77 88 87.5
condition 27 35 77.1
subroutine 66 70 94.2
pod 33 33 100.0
total 449 479 93.7


line stmt bran cond sub pod time code
1             package Language::FormulaEngine::Parser;
2 9     9   464892 use Moo;
  9         34224  
  9         72  
3 9     9   6605 use Carp;
  9         33  
  9         588  
4 9     9   994 use Try::Tiny;
  9         2659  
  9         566  
5 9     9   75 use List::Util qw( min max );
  9         23  
  9         741  
6             use Language::FormulaEngine::Parser::ContextUtil
7 9     9   4191 qw( calc_text_coordinates format_context_string format_context_multiline );
  9         112  
  9         662  
8 9     9   2039 use namespace::clean;
  9         39086  
  9         61  
9              
10             # ABSTRACT: Create parse tree from an input string
11             our $VERSION = '0.07'; # VERSION
12              
13              
14             has parse_tree => ( is => 'rw' );
15             has error => ( is => 'rw' );
16             has functions => ( is => 'rw' );
17             has symbols => ( is => 'rw' );
18              
19             sub parse {
20 283     283 1 57623 my ($self, $input)= @_;
21 283         807 $self->reset;
22 283         639 $self->{input}= $input;
23 283         839 pos( $self->{input} )= 0;
24             try {
25 283     283   11793 $self->next_token;
26 282         689 my $tree= $self->parse_expr;
27             # It is an error if there was un-processed input.
28 281 100       603 $self->token_type eq '0'
29             or die sprintf('Unexpected %s "%s" near %s',
30             $self->token_type, $self->token_value, $self->token_context);
31 280         973 $self->parse_tree($tree);
32             } catch {
33 3     3   55 chomp;
34 3         16 $self->error($_);
35 283         1803 };
36 283         5218 return $self->parse_tree;
37             }
38              
39             sub reset {
40 283     283 1 493 my $self= shift;
41 283         1319 $self->parse_tree(undef);
42 283         588 $self->error(undef);
43 283         726 $self->functions({});
44 283         659 $self->symbols({});
45 283         490 delete @{$self}{'input','token_type','token_value','token_pos'};
  283         1020  
46 283         506 $self;
47             }
48              
49              
50             sub deparse {
51 63     63 1 3734 my ($self, $node)= @_;
52 63 100       205 $node= $self->parse_tree unless @_ > 1;
53 63         168 $node->deparse($self);
54             }
55              
56              
57 0     0 1 0 sub input { shift->{input} }
58 0     0 1 0 sub input_pos { pos( shift->{input} ) }
59 338     338 1 1164 sub token_type { shift->{token_type} }
60 47     47 1 221 sub token_value { shift->{token_value} }
61 46     46 1 115 sub token_pos { shift->{token_pos} }
62              
63              
64             sub next_token {
65 2078     2078 1 48526 my $self= shift;
66            
67             # If already reached end of input, throw an exception.
68             die "Can't call next_token after end of input"
69 2078 50 100     5827 if '0' eq ($self->{token_type}||'');
70            
71             # Detect the next token
72 2078         4219 my ($type, $val, $pos0, $pos1)= ('','');
73 2078         3921 while ($type eq '') {
74 2312   100     4901 $pos0= pos($self->{input}) || 0;
75 2312         54467 ($type, $val)= $self->scan_token;
76 2312   100     11930 $pos1= pos($self->{input}) || 0;
77             # Check for end of buffer, even if it matched.
78 2312 100       4877 if ($pos1 >= length $self->{input}) {
79             #pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80             #if ($self->_grow_buffer) {
81             # $log->trace("grow buffer succeeded");
82             # $type= '';
83             # next;
84             #}
85             #pos($self->{input})= $pos1; # restore actual position\
86             # If we didn't get a token or are ignoring this final token, then return the EOF token
87 577 100 100     1769 if (!defined $type || $type eq '') {
88 291         498 $type= 0;
89 291         448 $val= '';
90 291         451 $pos0= $pos1;
91 291         521 last;
92             }
93             }
94 2021 100       3657 defined $type
95             or die "Unknown syntax at ".$self->token_context."\n";
96 2020 50       5107 $pos1 > $pos0
97             or croak "Tokenizer consumed zero characters";
98             }
99 2077         3349 @{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
  2077         4860  
100 2077         3734 return $type, $val;
101             }
102              
103              
104             sub consume_token {
105 971     971 1 18587 my $self= shift;
106             croak "Can't consume EOF"
107 971 100       2992 if $self->{token_type} eq '0';
108 961         1539 my $val= $self->{token_value};
109 961         2227 $self->next_token;
110 961         2342 return $val;
111             }
112              
113             sub token_context {
114 2     2 1 6 my ($self, %args)= @_;
115             return format_context_multiline($self->{input}, $self->{token_pos}||0, pos($self->{input})||0, \%args)
116 2 50 0     6 if delete $args{multiline};
      0        
117 2   100     25 return format_context_string($self->{input}, $self->{token_pos}||0, pos($self->{input})||0);
      100        
118             }
119              
120              
121 739     739 1 1492 sub parse_expr { shift->parse_or_expr; }
122              
123             sub parse_or_expr {
124 739     739 1 1172 my $self= shift;
125 739         1408 my $first= $self->parse_and_expr;
126 738 100       2031 return $first unless $self->{token_type} eq 'or';
127 2         6 my @or_expr= $first;
128 2         17 while ($self->{token_type} eq 'or') {
129 2         8 $self->next_token;
130 2         5 push @or_expr, $self->parse_and_expr;
131             }
132 2         7 return $self->new_call('or', \@or_expr);
133             }
134              
135             sub parse_and_expr {
136 741     741 1 1195 my $self= shift;
137 741         1390 my $first= $self->parse_not_expr;
138 740 100       1757 return $first unless $self->{token_type} eq 'and';
139 8         21 my @and_expr= $first;
140 8         38 while ($self->{token_type} eq 'and') {
141 8         28 $self->next_token;
142 8         26 push @and_expr, $self->parse_not_expr;
143             }
144 8         29 return $self->new_call('and', \@and_expr);
145             }
146              
147             sub parse_not_expr {
148 749     749 1 1125 my $self= shift;
149 749 100 66     2804 if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150 5         16 $self->next_token;
151 5         33 return $self->new_call('not', [ $self->parse_cmp_expr ]);
152             }
153 744         1523 return $self->parse_cmp_expr;
154             }
155              
156             my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157             sub parse_cmp_expr {
158 749     749 1 1111 my $self= shift;
159 749         1422 my $first= $self->parse_sum_expr;
160 748 100       2049 return $first unless $_cmp_ops{$self->{token_type}};
161 24         67 my @expr= $first;
162 24         72 while ($_cmp_ops{$self->{token_type}}) {
163 32         81 push @expr, $self->new_string($self->{token_type});
164 32         87 $self->next_token;
165 32         81 push @expr, $self->parse_sum_expr;
166             }
167 24         74 return $self->new_call('compare', \@expr);
168             }
169              
170             sub parse_sum_expr {
171 781     781 1 1115 my $self= shift;
172 781         1473 my $first= $self->parse_prod_expr;
173 780 100 100     2800 return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174 26         68 my @sum_expr= $first;
175 26   100     93 while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176 35         89 my $negate= $self->consume_token eq '-';
177 35         104 my $operand= $self->parse_prod_expr;
178 35 100       181 push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179             }
180 26         83 return $self->new_call('sum', \@sum_expr);
181             }
182              
183             sub parse_prod_expr {
184 816     816 1 1271 my $self= shift;
185 816         1690 my $value= $self->parse_unit_expr;
186 815   100     3765 while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187 39         110 my $op= $self->consume_token;
188 39         110 my $right= $self->parse_unit_expr;
189 39 100       194 $value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190             }
191 815         1426 return $value;
192             }
193              
194             sub parse_unit_expr {
195 885     885 1 1308 my $self= shift;
196 885         1277 my $negate= 0;
197 885         1270 my $expr;
198              
199 885 100       1830 if ($self->{token_type} eq '-') {
200 30         75 $self->next_token;
201 30         72 return $self->get_negative($self->parse_unit_expr);
202             }
203              
204 855 100       1693 if ($self->{token_type} eq '(') {
205 13         40 $self->next_token;
206 13         43 my $args= $self->parse_list;
207             die "Expected ')' near ".$self->token_context."\n"
208 13 50       40 if $self->{token_type} ne ')';
209 13         36 $self->next_token;
210 13 100       58 return @$args > 1? $self->new_call('list', $args) : $args->[0];
211             }
212            
213 842 100       1673 if ($self->{token_type} eq 'Number') {
214 323         673 return $self->new_number($self->consume_token);
215             }
216            
217 519 100       1053 if ($self->{token_type} eq 'String') {
218 84         189 return $self->new_string($self->consume_token);
219             }
220            
221 435 100       956 if ($self->{token_type} eq 'Identifier') {
222 434         811 my $id= $self->consume_token;
223 434 100       1050 if ($self->{token_type} eq '(') {
224 266         680 $self->next_token;
225 266 100       917 my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226             die "Expected ')' near ".$self->token_context."\n"
227 266 50       653 if $self->{token_type} ne ')';
228 266         672 $self->next_token;
229 266         616 return $self->new_call($id, $args);
230             }
231             else {
232 168         452 return $self->new_symbol($id);
233             }
234             }
235            
236 1 50       10 if ($self->{token_type} eq '0') {
237 1         9 die "Expected expression, but reached end of input\n";
238             }
239            
240 0         0 die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241             }
242              
243             sub parse_list {
244 268     268 1 482 my $self= shift;
245 268         545 my @args= $self->parse_expr;
246 268         676 while ($self->{token_type} eq ',') {
247 189         454 $self->next_token;
248 189         430 push @args, $self->parse_expr;
249             }
250 268         641 return \@args;
251             }
252              
253              
254 8     8 1 54 sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255 8     8 1 57 sub math_operators { qw( + - * / ) }
256 8     8 1 32 sub logic_operators { qw( and or not ! ) }
257 8     8 1 25 sub list_operators { ',', '(', ')' }
258             sub keyword_map {
259             return {
260 8     8 1 30 (map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
  176         505  
261             '=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262             "\x{2264}" => '<=', "\x{2265}" => '>='
263             }
264             }
265             sub scanner_rules {
266 8     8 1 19 my $self= shift;
267 8         40 my $keywords= $self->keyword_map;
268             my $kw_regex= join '|', map "\Q$_\E",
269 8         68 sort { length($b) <=> length($a) } # longest keywords get priority
  528         858  
270             keys %$keywords;
271            
272             # Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273             # It seems to damage the scalar $1, but copying it first fixes the problem.
274 8 50 33     92 my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275             return (
276             # Pattern Name, Pattern, Token Type and Token Value
277 8         901 [ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278             [ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279             [ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280             [ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281             [ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282             # Single or double quoted string, using Pascal-style repeated quotes for escaping
283             [ 'StringLiteral', qr/(?:"((?:[^"]|"")*)"|'((?:[^']|'')*)')/, q%
284             do{
285             my $str= defined $1? $1 : $2;
286             $str =~ s/""/"/g if defined $1;
287             $str =~ s/''/'/g if defined $2;
288             (String => $str)
289             }
290             %],
291             );
292             }
293              
294             sub _build_scan_token_method_body {
295 8     8   28 my ($self, $rules)= @_;
296 8         242 return join('', map
297             ' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298             @$rules
299             ).' return;' # return empty list of no rule matched
300             }
301              
302             sub _build_scan_token_method {
303 8     8   27 my ($pkg, $method_name)= @_;
304 8 50       34 $pkg= ref $pkg if ref $pkg;
305 8 50       45 $method_name= 'scan_token' unless defined $method_name;
306 8         45 my @rules= $pkg->scanner_rules;
307             # collect variables which should be available to the code
308 8 100       42 my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
  48         132  
  8         33  
309 8         81 my $code= join "\n",
310             (map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311             "sub ${pkg}::$method_name {",
312             ' my $self= shift;',
313             $pkg->_build_scan_token_method_body(\@rules),
314             "}\n";
315             # closure needed for 5.8 and 5.10 which complain about using a lexical
316             # in a sub declared at package scope.
317 9     9   27161 no warnings 'redefine','closure';
  9         29  
  9         10620  
318 8 50   2312 1 4272 eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
  2312 100       5593  
  2312 100       7271  
  2075 100       6913  
  1747 100       3965  
  1745 100       7434  
  821 100       3140  
  376 100       1309  
  87 100       310  
  87 100       250  
  87         248  
  87         328  
  289         732  
319 8         108 return $pkg->can('scan_token');
320             }
321              
322 2     2 1 12 sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
  2         51  
323              
324              
325             sub Language::FormulaEngine::Parser::Node::Call::new {
326 13     13   30 my ($class, $name, $params)= @_;
327 13         51 bless [ $name, $params ], $class;
328             }
329 7     7   21 sub Language::FormulaEngine::Parser::Node::Call::is_constant { 0 }
330 414     414   1071 sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
331 444     444   1576 sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
332             sub Language::FormulaEngine::Parser::Node::Call::evaluate {
333 169     169   1245 my ($self, $namespace)= @_;
334 169         459 $namespace->evaluate_call($self);
335             }
336             sub Language::FormulaEngine::Parser::Node::Call::simplify {
337 15     15   49 my ($node, $namespace)= @_;
338 15         74 $namespace->simplify_call($node)
339             }
340             sub Language::FormulaEngine::Parser::Node::Call::deparse {
341 23     23   67 my ($node, $parser)= @_;
342             return $node->function_name . (
343 23         53 !@{$node->parameters}? '()'
344 23 100       46 : '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
  21         40  
345             )
346             }
347              
348             sub new_call {
349 377     377 1 759 my ($self, $fn, $params)= @_;
350 377         1182 $self->functions->{$fn}++; # record dependency on this function
351 377         2288 bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
352             }
353              
354              
355             sub Language::FormulaEngine::Parser::Node::symbol::new {
356 0     0   0 my ($class, $name)= @_;
357 0         0 bless \$name, $class;
358             }
359              
360 7     7   22 sub Language::FormulaEngine::Parser::Node::Symbol::is_constant { 0 }
361 121     121   195 sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
  121         464  
362             sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
363 66     66   126 my ($self, $namespace)= @_;
364 66         225 $namespace->get_value($$self);
365             }
366             sub Language::FormulaEngine::Parser::Node::Symbol::simplify {
367 15     15   41 my ($self, $namespace)= @_;
368 15         50 return $namespace->simplify_symref($self);
369             }
370             sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
371 24     24   75 shift->symbol_name;
372             }
373              
374             sub new_symbol {
375 168     168 1 363 my ($self, $name)= @_;
376 168         517 $self->symbols->{$name}++; # record dependency on this variable
377 168         544 bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
378             }
379              
380              
381             sub Language::FormulaEngine::Parser::Node::String::new {
382 0     0   0 my ($class, $value)= @_;
383 0         0 bless \$value, $class;
384             }
385              
386 1     1   4 sub Language::FormulaEngine::Parser::Node::String::is_constant { 1 }
387 62     62   114 sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
  62         199  
388 52     52   95 sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
  52         192  
389 1     1   2 sub Language::FormulaEngine::Parser::Node::String::simplify { $_[0] }
390             sub _str_escape {
391 6     6   10 my $str= shift;
392 6         15 $str =~ s/'/''/g;
393 6         68 "'$str'";
394             }
395             sub Language::FormulaEngine::Parser::Node::String::deparse {
396 6     6   28 _str_escape(shift->string_value);
397             }
398              
399             sub new_string {
400 116     116 1 282 my ($self, $text)= @_;
401 116         352 bless \$text, 'Language::FormulaEngine::Parser::Node::String'
402             }
403              
404              
405             sub Language::FormulaEngine::Parser::Node::Number::new {
406 18     18   47 my ($class, $value)= @_;
407 18         32 $value= 0+$value;
408 18         80 bless \$value, $class;
409             }
410            
411 25     25   93 sub Language::FormulaEngine::Parser::Node::Number::is_constant { 1 }
412 219     219   304 sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
  219         1245  
413 159     159   260 sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
  159         532  
414 6     6   24 sub Language::FormulaEngine::Parser::Node::Number::simplify { $_[0] }
415 18     18   48 sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
416              
417             sub new_number {
418 359     359 1 644 my $value= $_[1]+0;
419 359         1142 bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
420             }
421              
422              
423             sub get_negative {
424 41     41 1 85 my ($self, $node)= @_;
425 41 100       242 return $self->new_number(-$node->number_value) if $node->can('number_value');
426 5 50 66     30 return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
427 5         21 return $self->new_call('negative', [$node]);
428             }
429              
430             1;
431              
432             __END__
433              
434             =pod
435              
436             =encoding UTF-8
437              
438             =head1 NAME
439              
440             Language::FormulaEngine::Parser - Create parse tree from an input string
441              
442             =head1 VERSION
443              
444             version 0.07
445              
446             =head1 SYNOPSIS
447              
448             my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
449              
450             =head1 DESCRIPTION
451              
452             This class scans tokens from an input string and builds a parse tree. In compiler terminology,
453             it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
454             is easy and gives good error messages. It only parses strings, but leaves room for subclasses
455             to implement streaming. By default, the parser simply applies a Grammar to the input, without
456             checking whether the functions or variables exist, but can be subclassed to do more detailed
457             analysis during the parse.
458              
459             The generated parse tree is made up of Function nodes (each infix operator is converted to a
460             named function) and each Function node may contain Symbols, Strings, Numbers, and other
461             Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
462             to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
463             not include token/context information; this could also be added by a subclass.
464              
465             =head1 PUBLIC API
466              
467             =head2 parse
468              
469             Parse a new input text, updating all derived attributes with the result of the operation.
470             It returns the value of L</parse_tree> (which is undef if the parse failed).
471             On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
472             contain useful diagnostic information.
473              
474             =head2 parse_tree
475              
476             This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
477              
478             =head2 error
479              
480             This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
481             the parse.
482              
483             =head2 functions
484              
485             A set (hashref) of all function names encountered during the parse.
486              
487             =head2 symbols
488              
489             A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
490              
491             =head2 reset
492              
493             Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
494              
495             =head2 deparse
496              
497             my $formula_text= $parser->deparse($tree);
498              
499             Return a canonical formula text for the parse tree, or a parse tree that you supply.
500              
501             =head1 EXTENSIBLE API
502              
503             These methods and attributes are documented for purposes of subclassing the parser.
504              
505             =head2 input
506              
507             The input string being scanned.
508             Code within the parser should access this as C<< $self->{input} >> for efficiency.
509              
510             =head2 input_pos
511              
512             Shortcut for C<< pos($self->{input}) >>.
513              
514             =head2 token_type
515              
516             Type of current token scanned from C<input>.
517             Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
518              
519             =head2 token_value
520              
521             Value of current token scanned from C<input>, with escape sequences and etc resolved to a
522             sensible perl value.
523             Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
524              
525             =head2 token_pos
526              
527             An offset within C<input> where this token started.
528             Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
529              
530             =head2 next_token
531              
532             Advance to the next token, replacing the values of C<token_> variables and updating
533             C<input_pos>. Returns the token_type, of which all are true except EOF which has a
534             type of C<0>, so this also means the function returns true if it parsed a token and
535             false if it reached EOF. It dies if no token could be parsed.
536             If you call next_token again after the eof token, it throws an exception.
537              
538             This method is a wrapper around L</scan_token>. Override that method to add new token types.
539              
540             =head2 scan_token
541              
542             Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
543             the syntax is invalid. This is intended to be overridden by subclasses.
544              
545             =head2 consume_token
546              
547             return $self->consume_token if $self->{token_type} eq $desired_type;
548              
549             This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
550              
551             =head2 token_context
552              
553             my $text= $self->token_context(%options);
554              
555             Default behavior generates a string like:
556              
557             "'blah blah' on line 15, char 12"
558              
559             Passing C<< token_context(multiline => 1) >> generates a string like
560              
561             "Expected something else at line 15, char 16\n" .
562             "blah blah blah token blah blah\n" .
563             " ^^^^^\n"
564              
565             Multiline additionally takes arguments as described in
566             L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
567              
568             =head1 GRAMMAR
569              
570             =head2 Parse Rules
571              
572             The default grammar implements the following rules:
573              
574             expr ::= or_expr
575             or_expr ::= and_expr ( 'or' and_expr )*
576             and_expr ::= not_expr ( 'and' not_expr )*
577             not_expr ::= ( 'not' | '!' ) cmp_expr | cmp_expr
578             cmp_expr ::= sum_expr ( ( '=' | '==' | '<>' | '\u2260' | '<' | '<=' | '>' | '>=' ) sum_expr )*
579             sum_expr ::= prod_expr ( ('+' | '-') prod_expr )*
580             prod_expr ::= ( unit_expr ('*' | '/') )* unit_expr
581             unit_expr ::= '-' unit_expr | Identifier '(' list ')' | '(' (expr|list) ')' | Identifier | Number | String
582             list ::= expr ( ',' expr )* ','?
583              
584             C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
585              
586             The parser uses a Recursive Descent algorithm implemented as the following method calls.
587             Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
588              
589             =over
590              
591             =item parse_expr
592              
593             =item parse_or_expr
594              
595             =item parse_and_expr
596              
597             =item parse_not_expr
598              
599             =item parse_cmp_expr
600              
601             =item parse_sum_expr
602              
603             =item parse_prod_expr
604              
605             =item parse_unit_expr
606              
607             =item parse_list
608              
609             =back
610              
611             =head2 Token Types
612              
613             =over
614              
615             =item C<'Number'>
616              
617             All the common decimal representations of integers and floating point numbers
618             which perl can parse. Optional decimals and decimal point followed by decimals
619             and optional exponent, ending at either the end of the input or a non-alphanumeric.
620              
621             =item C<'String'>
622              
623             A single-quoted or double-quoted string, treating a double occurrence of the quote
624             character to mean a literal quote character. ("Pascal style")
625              
626             'apostrophes are''nt hard'
627              
628             There are no escape sequences though, so to get control characters or awkward unicode
629             into a string you need something like:
630              
631             concat("smile ",char(0x263A))
632              
633             which depends on those functions being available in the namespace.
634              
635             =item Keywords...
636              
637             Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
638             such as operators and punctuation.
639             The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
640             is the actual text that was captured. The pattern matches the longest keyword possible.
641              
642             =item C<'Identifier'>
643              
644             Any alpha (or underscore) followed by any run of alphanumerics,
645             (including underscore and period).
646              
647             =back
648              
649             =head2 Customizing the Token Scanner
650              
651             The tokens are parsed using a series of regex tests. The regexes and the code that handles a
652             match of that regex are found in package attribute L</scanner_rules>. These regexes and code
653             fragments get lazily compiled into a package method on the first use (per package).
654             Meanwhile, several of those regex are built from other package attributes.
655              
656             =over
657              
658             =item scanner_rules
659              
660             This package method returns a list (not arrayref) of ordered elements of the form
661             C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
662             the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
663             or add/remove your own rules. The regexes are attempted in the order they appear in this
664             list. You do not need to use "\G" or "/gc" on these regexes because those are added
665             automatically during compilation.
666              
667             =item keyword_map
668              
669             This package method returns a hashref of all known keywords, mapped to their canonical form.
670             So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
671             become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
672             the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
673             can be overridden separately.
674              
675             This method is called once during the compilation of L</scan_token>, and the result is then
676             made into a constant and referenced by the compiled method, so dynamic changes to the output
677             of this method will be ignored.
678              
679             =item cmp_operators
680              
681             Package method that returns a list of comparison operators, like '<', '>=', etc.
682              
683             =item math_operators
684              
685             Package method that returns a list of math operators, like '*', '+', etc.
686              
687             =item logic_operators
688              
689             Package method that returns a list of keywords like 'and', 'or', etc.
690              
691             =item list_operators
692              
693             Package method that returns a list of '(', ')', ','
694              
695             =back
696              
697             =head2 Parse Nodes
698              
699             The parse tree takes a minimalist approach to node classification. In this default
700             implementation, number values, string values, and symbolic references have just a simple
701             wrapper around the value, and function calls are just a pair of function name and list of
702             arguments. All language operators are represented as function calls.
703              
704             A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
705              
706             The class name of the blessed nodes should be ignored. A function is anything which
707             C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
708             anything which C<< can("number_value") >> and a symbolic reference is anything which
709             C<< can("symbolic_name") >>.
710              
711             Subclasses of Parser should implemnt new node types as needed. You probable also need to
712             update L</deparse>.
713              
714             The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
715             class, so that you can easily subclass C<Parser> and override which class of node is getting
716             created.
717              
718             =over
719              
720             =item new_call
721              
722             $node= $parser->new_call( $function_name, $parameters );
723              
724             Generate a node for a function call. The returned node has attributes C<function_name>
725             and C<parameters>
726              
727             =item new_symbol
728              
729             $node= $parser->new_symbol($symbol_name);
730              
731             A reference to a symbolic value (i.e. variable or constant).
732             It has one attribute C<symbol_name>.
733              
734             =item new_string
735              
736             $node= $parser->new_string($string_value);
737              
738             A string literal. It has an attribute C<string_value> holding the raw value.
739              
740             =item new_number
741              
742             $plain_scalar= $parser->new_number($value);
743              
744             A numeric constant. It has an attribute C<number_value> holding the raw value.
745              
746             =item get_negative
747              
748             $negative_node= $parser->get_negative( $node );
749              
750             Utility method to get the "opposite of" a parse node. By default, this wraps it with the
751             function C<'negative'>, unless it already was that function then it unwraps the parameter.
752             It performs simple negation on numbers.
753              
754             =back
755              
756             =head1 AUTHOR
757              
758             Michael Conrad <mconrad@intellitree.com>
759              
760             =head1 COPYRIGHT AND LICENSE
761              
762             This software is copyright (c) 2023 by Michael Conrad, IntelliTree Solutions llc.
763              
764             This is free software; you can redistribute it and/or modify it under
765             the same terms as the Perl 5 programming language system itself.
766              
767             =cut