File Coverage

blib/lib/Language/FormulaEngine/Parser.pm
Criterion Covered Total %
statement 246 253 97.2
branch 77 88 87.5
condition 27 35 77.1
subroutine 66 70 94.2
pod 33 33 100.0
total 449 479 93.7


line stmt bran cond sub pod time code
1             package Language::FormulaEngine::Parser;
2 9     9   466036 use Moo;
  9         33873  
  9         78  
3 9     9   6842 use Carp;
  9         20  
  9         636  
4 9     9   1132 use Try::Tiny;
  9         2702  
  9         594  
5 9     9   72 use List::Util qw( min max );
  9         30  
  9         783  
6             use Language::FormulaEngine::Parser::ContextUtil
7 9     9   4362 qw( calc_text_coordinates format_context_string format_context_multiline );
  9         27  
  9         708  
8 9     9   1942 use namespace::clean;
  9         38570  
  9         70  
9              
10             # ABSTRACT: Create parse tree from an input string
11             our $VERSION = '0.08'; # VERSION
12              
13              
14             has parse_tree => ( is => 'rw' );
15             has error => ( is => 'rw' );
16             has functions => ( is => 'rw' );
17             has symbols => ( is => 'rw' );
18              
19             sub parse {
20 283     283 1 63747 my ($self, $input)= @_;
21 283         876 $self->reset;
22 283         573 $self->{input}= $input;
23 283         863 pos( $self->{input} )= 0;
24             try {
25 283     283   11925 $self->next_token;
26 282         643 my $tree= $self->parse_expr;
27             # It is an error if there was un-processed input.
28 281 100       652 $self->token_type eq '0'
29             or die sprintf('Unexpected %s "%s" near %s',
30             $self->token_type, $self->token_value, $self->token_context);
31 280         962 $self->parse_tree($tree);
32             } catch {
33 3     3   67 chomp;
34 3         17 $self->error($_);
35 283         1870 };
36 283         5134 return $self->parse_tree;
37             }
38              
39             sub reset {
40 283     283 1 547 my $self= shift;
41 283         1365 $self->parse_tree(undef);
42 283         648 $self->error(undef);
43 283         759 $self->functions({});
44 283         695 $self->symbols({});
45 283         471 delete @{$self}{'input','token_type','token_value','token_pos'};
  283         1099  
46 283         516 $self;
47             }
48              
49              
50             sub deparse {
51 63     63 1 3702 my ($self, $node)= @_;
52 63 100       150 $node= $self->parse_tree unless @_ > 1;
53 63         142 $node->deparse($self);
54             }
55              
56              
57 0     0 1 0 sub input { shift->{input} }
58 0     0 1 0 sub input_pos { pos( shift->{input} ) }
59 338     338 1 1206 sub token_type { shift->{token_type} }
60 47     47 1 228 sub token_value { shift->{token_value} }
61 46     46 1 114 sub token_pos { shift->{token_pos} }
62              
63              
64             sub next_token {
65 2078     2078 1 50258 my $self= shift;
66            
67             # If already reached end of input, throw an exception.
68             die "Can't call next_token after end of input"
69 2078 50 100     5960 if '0' eq ($self->{token_type}||'');
70            
71             # Detect the next token
72 2078         4057 my ($type, $val, $pos0, $pos1)= ('','');
73 2078         4004 while ($type eq '') {
74 2312   100     4815 $pos0= pos($self->{input}) || 0;
75 2312         55489 ($type, $val)= $self->scan_token;
76 2312   100     11833 $pos1= pos($self->{input}) || 0;
77             # Check for end of buffer, even if it matched.
78 2312 100       4765 if ($pos1 >= length $self->{input}) {
79             #pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80             #if ($self->_grow_buffer) {
81             # $log->trace("grow buffer succeeded");
82             # $type= '';
83             # next;
84             #}
85             #pos($self->{input})= $pos1; # restore actual position\
86             # If we didn't get a token or are ignoring this final token, then return the EOF token
87 577 100 100     1774 if (!defined $type || $type eq '') {
88 291         461 $type= 0;
89 291         466 $val= '';
90 291         414 $pos0= $pos1;
91 291         504 last;
92             }
93             }
94 2021 100       3837 defined $type
95             or die "Unknown syntax at ".$self->token_context."\n";
96 2020 50       4938 $pos1 > $pos0
97             or croak "Tokenizer consumed zero characters";
98             }
99 2077         3503 @{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
  2077         4973  
100 2077         3763 return $type, $val;
101             }
102              
103              
104             sub consume_token {
105 971     971 1 19225 my $self= shift;
106             croak "Can't consume EOF"
107 971 100       3020 if $self->{token_type} eq '0';
108 961         1544 my $val= $self->{token_value};
109 961         2122 $self->next_token;
110 961         2301 return $val;
111             }
112              
113             sub token_context {
114 2     2 1 5 my ($self, %args)= @_;
115             return format_context_multiline($self->{input}, $self->{token_pos}||0, pos($self->{input})||0, \%args)
116 2 50 0     7 if delete $args{multiline};
      0        
117 2   100     19 return format_context_string($self->{input}, $self->{token_pos}||0, pos($self->{input})||0);
      100        
118             }
119              
120              
121 739     739 1 1622 sub parse_expr { shift->parse_or_expr; }
122              
123             sub parse_or_expr {
124 739     739 1 1109 my $self= shift;
125 739         1438 my $first= $self->parse_and_expr;
126 738 100       2119 return $first unless $self->{token_type} eq 'or';
127 2         5 my @or_expr= $first;
128 2         18 while ($self->{token_type} eq 'or') {
129 2         11 $self->next_token;
130 2         9 push @or_expr, $self->parse_and_expr;
131             }
132 2         6 return $self->new_call('or', \@or_expr);
133             }
134              
135             sub parse_and_expr {
136 741     741 1 1166 my $self= shift;
137 741         1367 my $first= $self->parse_not_expr;
138 740 100       1873 return $first unless $self->{token_type} eq 'and';
139 8         26 my @and_expr= $first;
140 8         38 while ($self->{token_type} eq 'and') {
141 8         33 $self->next_token;
142 8         26 push @and_expr, $self->parse_not_expr;
143             }
144 8         46 return $self->new_call('and', \@and_expr);
145             }
146              
147             sub parse_not_expr {
148 749     749 1 1052 my $self= shift;
149 749 100 66     2801 if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150 5         17 $self->next_token;
151 5         12 return $self->new_call('not', [ $self->parse_cmp_expr ]);
152             }
153 744         1495 return $self->parse_cmp_expr;
154             }
155              
156             my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157             sub parse_cmp_expr {
158 749     749 1 1212 my $self= shift;
159 749         1446 my $first= $self->parse_sum_expr;
160 748 100       2198 return $first unless $_cmp_ops{$self->{token_type}};
161 24         64 my @expr= $first;
162 24         87 while ($_cmp_ops{$self->{token_type}}) {
163 32         124 push @expr, $self->new_string($self->{token_type});
164 32         99 $self->next_token;
165 32         86 push @expr, $self->parse_sum_expr;
166             }
167 24         84 return $self->new_call('compare', \@expr);
168             }
169              
170             sub parse_sum_expr {
171 781     781 1 1192 my $self= shift;
172 781         1444 my $first= $self->parse_prod_expr;
173 780 100 100     2884 return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174 26         65 my @sum_expr= $first;
175 26   100     89 while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176 35         85 my $negate= $self->consume_token eq '-';
177 35         103 my $operand= $self->parse_prod_expr;
178 35 100       195 push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179             }
180 26         74 return $self->new_call('sum', \@sum_expr);
181             }
182              
183             sub parse_prod_expr {
184 816     816 1 1221 my $self= shift;
185 816         1553 my $value= $self->parse_unit_expr;
186 815   100     3408 while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187 39         93 my $op= $self->consume_token;
188 39         97 my $right= $self->parse_unit_expr;
189 39 100       211 $value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190             }
191 815         1436 return $value;
192             }
193              
194             sub parse_unit_expr {
195 885     885 1 1346 my $self= shift;
196 885         1237 my $negate= 0;
197 885         1211 my $expr;
198              
199 885 100       1820 if ($self->{token_type} eq '-') {
200 30         71 $self->next_token;
201 30         102 return $self->get_negative($self->parse_unit_expr);
202             }
203              
204 855 100       1659 if ($self->{token_type} eq '(') {
205 13         42 $self->next_token;
206 13         54 my $args= $self->parse_list;
207             die "Expected ')' near ".$self->token_context."\n"
208 13 50       47 if $self->{token_type} ne ')';
209 13         36 $self->next_token;
210 13 100       63 return @$args > 1? $self->new_call('list', $args) : $args->[0];
211             }
212            
213 842 100       1654 if ($self->{token_type} eq 'Number') {
214 323         611 return $self->new_number($self->consume_token);
215             }
216            
217 519 100       1038 if ($self->{token_type} eq 'String') {
218 84         184 return $self->new_string($self->consume_token);
219             }
220            
221 435 100       921 if ($self->{token_type} eq 'Identifier') {
222 434         753 my $id= $self->consume_token;
223 434 100       1074 if ($self->{token_type} eq '(') {
224 266         730 $self->next_token;
225 266 100       1368 my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226             die "Expected ')' near ".$self->token_context."\n"
227 266 50       695 if $self->{token_type} ne ')';
228 266         655 $self->next_token;
229 266         684 return $self->new_call($id, $args);
230             }
231             else {
232 168         452 return $self->new_symbol($id);
233             }
234             }
235            
236 1 50       12 if ($self->{token_type} eq '0') {
237 1         7 die "Expected expression, but reached end of input\n";
238             }
239            
240 0         0 die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241             }
242              
243             sub parse_list {
244 268     268 1 483 my $self= shift;
245 268         549 my @args= $self->parse_expr;
246 268         664 while ($self->{token_type} eq ',') {
247 189         493 $self->next_token;
248 189         467 push @args, $self->parse_expr;
249             }
250 268         568 return \@args;
251             }
252              
253              
254 8     8 1 53 sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255 8     8 1 35 sub math_operators { qw( + - * / ) }
256 8     8 1 27 sub logic_operators { qw( and or not ! ) }
257 8     8 1 26 sub list_operators { ',', '(', ')' }
258             sub keyword_map {
259             return {
260 8     8 1 40 (map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
  176         591  
261             '=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262             "\x{2264}" => '<=', "\x{2265}" => '>='
263             }
264             }
265             sub scanner_rules {
266 8     8 1 25 my $self= shift;
267 8         30 my $keywords= $self->keyword_map;
268             my $kw_regex= join '|', map "\Q$_\E",
269 8         82 sort { length($b) <=> length($a) } # longest keywords get priority
  524         906  
270             keys %$keywords;
271            
272             # Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273             # It seems to damage the scalar $1, but copying it first fixes the problem.
274 8 50 33     95 my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275             return (
276             # Pattern Name, Pattern, Token Type and Token Value
277 8         935 [ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278             [ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279             [ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280             [ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281             [ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282             # Single or double quoted string, using Pascal-style repeated quotes for escaping
283             [ 'StringLiteral', qr/(?:"((?:[^"]|"")*)"|'((?:[^']|'')*)')/, q%
284             do{
285             my $str= defined $1? $1 : $2;
286             $str =~ s/""/"/g if defined $1;
287             $str =~ s/''/'/g if defined $2;
288             (String => $str)
289             }
290             %],
291             );
292             }
293              
294             sub _build_scan_token_method_body {
295 8     8   31 my ($self, $rules)= @_;
296 8         240 return join('', map
297             ' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298             @$rules
299             ).' return;' # return empty list of no rule matched
300             }
301              
302             sub _build_scan_token_method {
303 8     8   26 my ($pkg, $method_name)= @_;
304 8 50       43 $pkg= ref $pkg if ref $pkg;
305 8 50       54 $method_name= 'scan_token' unless defined $method_name;
306 8         50 my @rules= $pkg->scanner_rules;
307             # collect variables which should be available to the code
308 8 100       38 my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
  48         133  
  8         38  
309 8         91 my $code= join "\n",
310             (map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311             "sub ${pkg}::$method_name {",
312             ' my $self= shift;',
313             $pkg->_build_scan_token_method_body(\@rules),
314             "}\n";
315             # closure needed for 5.8 and 5.10 which complain about using a lexical
316             # in a sub declared at package scope.
317 9     9   27585 no warnings 'redefine','closure';
  9         86  
  9         10517  
318 8 50   2312 1 4258 eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
  2312 100       5385  
  2312 100       7079  
  2075 100       6637  
  1747 100       3784  
  1745 100       7202  
  821 100       3208  
  376 100       1256  
  87 100       314  
  87 100       267  
  87         246  
  87         337  
  289         762  
319 8         129 return $pkg->can('scan_token');
320             }
321              
322 2     2 1 13 sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
  2         52  
323              
324              
325             sub Language::FormulaEngine::Parser::Node::Call::new {
326 13     13   31 my ($class, $name, $params)= @_;
327 13         52 bless [ $name, $params ], $class;
328             }
329 7     7   24 sub Language::FormulaEngine::Parser::Node::Call::is_constant { 0 }
330 414     414   1141 sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
331 444     444   1635 sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
332             sub Language::FormulaEngine::Parser::Node::Call::evaluate {
333 169     169   1222 my ($self, $namespace)= @_;
334 169         513 $namespace->evaluate_call($self);
335             }
336             sub Language::FormulaEngine::Parser::Node::Call::simplify {
337 15     15   48 my ($node, $namespace)= @_;
338 15         63 $namespace->simplify_call($node)
339             }
340             sub Language::FormulaEngine::Parser::Node::Call::deparse {
341 23     23   74 my ($node, $parser)= @_;
342             return $node->function_name . (
343 23         47 !@{$node->parameters}? '()'
344 23 100       48 : '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
  21         39  
345             )
346             }
347              
348             sub new_call {
349 377     377 1 834 my ($self, $fn, $params)= @_;
350 377         1289 $self->functions->{$fn}++; # record dependency on this function
351 377         1491 bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
352             }
353              
354              
355             sub Language::FormulaEngine::Parser::Node::symbol::new {
356 0     0   0 my ($class, $name)= @_;
357 0         0 bless \$name, $class;
358             }
359              
360 7     7   22 sub Language::FormulaEngine::Parser::Node::Symbol::is_constant { 0 }
361 121     121   196 sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
  121         451  
362             sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
363 66     66   139 my ($self, $namespace)= @_;
364 66         198 $namespace->get_value($$self);
365             }
366             sub Language::FormulaEngine::Parser::Node::Symbol::simplify {
367 15     15   55 my ($self, $namespace)= @_;
368 15         46 return $namespace->simplify_symref($self);
369             }
370             sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
371 24     24   90 shift->symbol_name;
372             }
373              
374             sub new_symbol {
375 168     168 1 395 my ($self, $name)= @_;
376 168         548 $self->symbols->{$name}++; # record dependency on this variable
377 168         549 bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
378             }
379              
380              
381             sub Language::FormulaEngine::Parser::Node::String::new {
382 0     0   0 my ($class, $value)= @_;
383 0         0 bless \$value, $class;
384             }
385              
386 1     1   4 sub Language::FormulaEngine::Parser::Node::String::is_constant { 1 }
387 62     62   93 sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
  62         225  
388 52     52   88 sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
  52         231  
389 1     1   2 sub Language::FormulaEngine::Parser::Node::String::simplify { $_[0] }
390             sub _str_escape {
391 6     6   9 my $str= shift;
392 6         14 $str =~ s/'/''/g;
393 6         23 "'$str'";
394             }
395             sub Language::FormulaEngine::Parser::Node::String::deparse {
396 6     6   13 _str_escape(shift->string_value);
397             }
398              
399             sub new_string {
400 116     116 1 263 my ($self, $text)= @_;
401 116         381 bless \$text, 'Language::FormulaEngine::Parser::Node::String'
402             }
403              
404              
405             sub Language::FormulaEngine::Parser::Node::Number::new {
406 18     18   40 my ($class, $value)= @_;
407 18         32 $value= 0+$value;
408 18         83 bless \$value, $class;
409             }
410            
411 25     25   81 sub Language::FormulaEngine::Parser::Node::Number::is_constant { 1 }
412 219     219   349 sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
  219         1297  
413 159     159   253 sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
  159         522  
414 6     6   13 sub Language::FormulaEngine::Parser::Node::Number::simplify { $_[0] }
415 18     18   39 sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
416              
417             sub new_number {
418 359     359 1 646 my $value= $_[1]+0;
419 359         1300 bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
420             }
421              
422              
423             sub get_negative {
424 41     41 1 93 my ($self, $node)= @_;
425 41 100       218 return $self->new_number(-$node->number_value) if $node->can('number_value');
426 5 50 66     50 return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
427 5         21 return $self->new_call('negative', [$node]);
428             }
429              
430             1;
431              
432             __END__
433              
434             =pod
435              
436             =encoding UTF-8
437              
438             =head1 NAME
439              
440             Language::FormulaEngine::Parser - Create parse tree from an input string
441              
442             =head1 VERSION
443              
444             version 0.08
445              
446             =head1 SYNOPSIS
447              
448             my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
449              
450             =head1 DESCRIPTION
451              
452             This class scans tokens from an input string and builds a parse tree. In compiler terminology,
453             it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
454             is easy and gives good error messages. It only parses strings, but leaves room for subclasses
455             to implement streaming. By default, the parser simply applies a Grammar to the input, without
456             checking whether the functions or variables exist, but can be subclassed to do more detailed
457             analysis during the parse.
458              
459             The generated parse tree is made up of Function nodes (each infix operator is converted to a
460             named function) and each Function node may contain Symbols, Strings, Numbers, and other
461             Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
462             to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
463             not include token/context information; this could also be added by a subclass.
464              
465             =head1 PUBLIC API
466              
467             =head2 parse
468              
469             Parse a new input text, updating all derived attributes with the result of the operation.
470             It returns the value of L</parse_tree> (which is undef if the parse failed).
471             On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
472             contain useful diagnostic information.
473              
474             =head2 parse_tree
475              
476             This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
477              
478             =head2 error
479              
480             This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
481             the parse.
482              
483             =head2 functions
484              
485             A set (hashref) of all function names encountered during the parse.
486              
487             =head2 symbols
488              
489             A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
490              
491             =head2 reset
492              
493             Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
494              
495             =head2 deparse
496              
497             my $formula_text= $parser->deparse($tree);
498              
499             Return a canonical formula text for the parse tree, or a parse tree that you supply.
500              
501             =head1 EXTENSIBLE API
502              
503             These methods and attributes are documented for purposes of subclassing the parser.
504              
505             =head2 input
506              
507             The input string being scanned.
508             Code within the parser should access this as C<< $self->{input} >> for efficiency.
509              
510             =head2 input_pos
511              
512             Shortcut for C<< pos($self->{input}) >>.
513              
514             =head2 token_type
515              
516             Type of current token scanned from C<input>.
517             Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
518              
519             =head2 token_value
520              
521             Value of current token scanned from C<input>, with escape sequences and etc resolved to a
522             sensible perl value.
523             Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
524              
525             =head2 token_pos
526              
527             An offset within C<input> where this token started.
528             Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
529              
530             =head2 next_token
531              
532             Advance to the next token, replacing the values of C<token_> variables and updating
533             C<input_pos>. Returns the token_type, of which all are true except EOF which has a
534             type of C<0>, so this also means the function returns true if it parsed a token and
535             false if it reached EOF. It dies if no token could be parsed.
536             If you call next_token again after the eof token, it throws an exception.
537              
538             This method is a wrapper around L</scan_token>. Override that method to add new token types.
539              
540             =head2 scan_token
541              
542             Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
543             the syntax is invalid. This is intended to be overridden by subclasses.
544              
545             =head2 consume_token
546              
547             return $self->consume_token if $self->{token_type} eq $desired_type;
548              
549             This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
550              
551             =head2 token_context
552              
553             my $text= $self->token_context(%options);
554              
555             Default behavior generates a string like:
556              
557             "'blah blah' on line 15, char 12"
558              
559             Passing C<< token_context(multiline => 1) >> generates a string like
560              
561             "Expected something else at line 15, char 16\n" .
562             "blah blah blah token blah blah\n" .
563             " ^^^^^\n"
564              
565             Multiline additionally takes arguments as described in
566             L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
567              
568             =head1 GRAMMAR
569              
570             =head2 Parse Rules
571              
572             The default grammar implements the following rules:
573              
574             expr ::= or_expr
575             or_expr ::= and_expr ( 'or' and_expr )*
576             and_expr ::= not_expr ( 'and' not_expr )*
577             not_expr ::= ( 'not' | '!' ) cmp_expr | cmp_expr
578             cmp_expr ::= sum_expr ( ( '=' | '==' | '<>' | '\u2260' | '<' | '<=' | '>' | '>=' ) sum_expr )*
579             sum_expr ::= prod_expr ( ('+' | '-') prod_expr )*
580             prod_expr ::= ( unit_expr ('*' | '/') )* unit_expr
581             unit_expr ::= '-' unit_expr | Identifier '(' list ')' | '(' (expr|list) ')' | Identifier | Number | String
582             list ::= expr ( ',' expr )* ','?
583              
584             C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
585              
586             The parser uses a Recursive Descent algorithm implemented as the following method calls.
587             Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
588              
589             =over
590              
591             =item parse_expr
592              
593             =item parse_or_expr
594              
595             =item parse_and_expr
596              
597             =item parse_not_expr
598              
599             =item parse_cmp_expr
600              
601             =item parse_sum_expr
602              
603             =item parse_prod_expr
604              
605             =item parse_unit_expr
606              
607             =item parse_list
608              
609             =back
610              
611             =head2 Token Types
612              
613             =over
614              
615             =item C<'Number'>
616              
617             All the common decimal representations of integers and floating point numbers
618             which perl can parse. Optional decimals and decimal point followed by decimals
619             and optional exponent, ending at either the end of the input or a non-alphanumeric.
620              
621             =item C<'String'>
622              
623             A single-quoted or double-quoted string, treating a double occurrence of the quote
624             character to mean a literal quote character. ("Pascal style")
625              
626             'apostrophes are''nt hard'
627              
628             There are no escape sequences though, so to get control characters or awkward unicode
629             into a string you need something like:
630              
631             concat("smile ",char(0x263A))
632              
633             which depends on those functions being available in the namespace.
634              
635             =item Keywords...
636              
637             Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
638             such as operators and punctuation.
639             The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
640             is the actual text that was captured. The pattern matches the longest keyword possible.
641              
642             =item C<'Identifier'>
643              
644             Any alpha (or underscore) followed by any run of alphanumerics,
645             (including underscore and period).
646              
647             =back
648              
649             =head2 Customizing the Token Scanner
650              
651             The tokens are parsed using a series of regex tests. The regexes and the code that handles a
652             match of that regex are found in package attribute L</scanner_rules>. These regexes and code
653             fragments get lazily compiled into a package method on the first use (per package).
654             Meanwhile, several of those regex are built from other package attributes.
655              
656             =over
657              
658             =item scanner_rules
659              
660             This package method returns a list (not arrayref) of ordered elements of the form
661             C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
662             the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
663             or add/remove your own rules. The regexes are attempted in the order they appear in this
664             list. You do not need to use "\G" or "/gc" on these regexes because those are added
665             automatically during compilation.
666              
667             =item keyword_map
668              
669             This package method returns a hashref of all known keywords, mapped to their canonical form.
670             So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
671             become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
672             the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
673             can be overridden separately.
674              
675             This method is called once during the compilation of L</scan_token>, and the result is then
676             made into a constant and referenced by the compiled method, so dynamic changes to the output
677             of this method will be ignored.
678              
679             =item cmp_operators
680              
681             Package method that returns a list of comparison operators, like '<', '>=', etc.
682              
683             =item math_operators
684              
685             Package method that returns a list of math operators, like '*', '+', etc.
686              
687             =item logic_operators
688              
689             Package method that returns a list of keywords like 'and', 'or', etc.
690              
691             =item list_operators
692              
693             Package method that returns a list of '(', ')', ','
694              
695             =back
696              
697             =head2 Parse Nodes
698              
699             The parse tree takes a minimalist approach to node classification. In this default
700             implementation, number values, string values, and symbolic references have just a simple
701             wrapper around the value, and function calls are just a pair of function name and list of
702             arguments. All language operators are represented as function calls.
703              
704             A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
705              
706             The class name of the blessed nodes should be ignored. A function is anything which
707             C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
708             anything which C<< can("number_value") >> and a symbolic reference is anything which
709             C<< can("symbolic_name") >>.
710              
711             Subclasses of Parser should implemnt new node types as needed. You probable also need to
712             update L</deparse>.
713              
714             The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
715             class, so that you can easily subclass C<Parser> and override which class of node is getting
716             created.
717              
718             =over
719              
720             =item new_call
721              
722             $node= $parser->new_call( $function_name, $parameters );
723              
724             Generate a node for a function call. The returned node has attributes C<function_name>
725             and C<parameters>
726              
727             =item new_symbol
728              
729             $node= $parser->new_symbol($symbol_name);
730              
731             A reference to a symbolic value (i.e. variable or constant).
732             It has one attribute C<symbol_name>.
733              
734             =item new_string
735              
736             $node= $parser->new_string($string_value);
737              
738             A string literal. It has an attribute C<string_value> holding the raw value.
739              
740             =item new_number
741              
742             $plain_scalar= $parser->new_number($value);
743              
744             A numeric constant. It has an attribute C<number_value> holding the raw value.
745              
746             =item get_negative
747              
748             $negative_node= $parser->get_negative( $node );
749              
750             Utility method to get the "opposite of" a parse node. By default, this wraps it with the
751             function C<'negative'>, unless it already was that function then it unwraps the parameter.
752             It performs simple negation on numbers.
753              
754             =back
755              
756             =head1 AUTHOR
757              
758             Michael Conrad <mconrad@intellitree.com>
759              
760             =head1 COPYRIGHT AND LICENSE
761              
762             This software is copyright (c) 2023 by Michael Conrad, IntelliTree Solutions llc.
763              
764             This is free software; you can redistribute it and/or modify it under
765             the same terms as the Perl 5 programming language system itself.
766              
767             =cut