File Coverage

blib/lib/Language/FormulaEngine/Parser.pm
Criterion Covered Total %
statement 226 234 96.5
branch 76 88 86.3
condition 27 35 77.1
subroutine 56 58 96.5
pod 33 33 100.0
total 418 448 93.3


line stmt bran cond sub pod time code
1             package Language::FormulaEngine::Parser;
2 7     7   456462 use Moo;
  7         24126  
  7         55  
3 7     7   4897 use Carp;
  7         38  
  7         456  
4 7     7   1090 use Try::Tiny;
  7         2713  
  7         442  
5 7     7   52 use List::Util qw( min max );
  7         26  
  7         549  
6             use Language::FormulaEngine::Parser::ContextUtil
7 7     7   3559 qw( calc_text_coordinates format_context_string format_context_multiline );
  7         22  
  7         474  
8 7     7   1598 use namespace::clean;
  7         30123  
  7         67  
9              
10             # ABSTRACT: Create parse tree from an input string
11             our $VERSION = '0.06'; # VERSION
12              
13              
14             has parse_tree => ( is => 'rw' );
15             has error => ( is => 'rw' );
16             has functions => ( is => 'rw' );
17             has symbols => ( is => 'rw' );
18              
19             sub parse {
20 273     273 1 46962 my ($self, $input)= @_;
21 273         819 $self->reset;
22 273         560 $self->{input}= $input;
23 273         834 pos( $self->{input} )= 0;
24             try {
25 273     273   11808 $self->next_token;
26 272         724 my $tree= $self->parse_expr;
27             # It is an error if there was un-processed input.
28 271 100       610 $self->token_type eq '0'
29             or die sprintf('Unexpected %s "%s" near %s',
30             $self->token_type, $self->token_value, $self->token_context);
31 270         944 $self->parse_tree($tree);
32             } catch {
33 3     3   48 chomp;
34 3         17 $self->error($_);
35 273         1874 };
36 273         5239 return $self->parse_tree;
37             }
38              
39             sub reset {
40 273     273 1 478 my $self= shift;
41 273         1262 $self->parse_tree(undef);
42 273         673 $self->error(undef);
43 273         766 $self->functions({});
44 273         664 $self->symbols({});
45 273         485 delete @{$self}{'input','token_type','token_value','token_pos'};
  273         1010  
46 273         486 $self;
47             }
48              
49              
50             sub deparse {
51 39     39 1 3641 my ($self, $node)= @_;
52 39 100       92 $node= $self->parse_tree unless @_ > 1;
53 39         83 $node->deparse($self);
54             }
55              
56              
57 0     0 1 0 sub input { shift->{input} }
58 0     0 1 0 sub input_pos { pos( shift->{input} ) }
59 328     328 1 1110 sub token_type { shift->{token_type} }
60 47     47 1 213 sub token_value { shift->{token_value} }
61 46     46 1 108 sub token_pos { shift->{token_pos} }
62              
63              
64             sub next_token {
65 2015     2015 1 49032 my $self= shift;
66            
67             # If already reached end of input, throw an exception.
68             die "Can't call next_token after end of input"
69 2015 50 100     5664 if '0' eq ($self->{token_type}||'');
70            
71             # Detect the next token
72 2015         4440 my ($type, $val, $pos0, $pos1)= ('','');
73 2015         4218 while ($type eq '') {
74 2222   100     4779 $pos0= pos($self->{input}) || 0;
75 2222         55304 ($type, $val)= $self->scan_token;
76 2222   100     11609 $pos1= pos($self->{input}) || 0;
77             # Check for end of buffer, even if it matched.
78 2222 100       4743 if ($pos1 >= length $self->{input}) {
79             #pos($self->{input})= $pos0; # rewind to start of token before growing buffer
80             #if ($self->_grow_buffer) {
81             # $log->trace("grow buffer succeeded");
82             # $type= '';
83             # next;
84             #}
85             #pos($self->{input})= $pos1; # restore actual position\
86             # If we didn't get a token or are ignoring this final token, then return the EOF token
87 557 100 100     1802 if (!defined $type || $type eq '') {
88 281         468 $type= 0;
89 281         445 $val= '';
90 281         411 $pos0= $pos1;
91 281         504 last;
92             }
93             }
94 1941 100       3812 defined $type
95             or die "Unknown syntax at ".$self->token_context."\n";
96 1940 50       4835 $pos1 > $pos0
97             or croak "Tokenizer consumed zero characters";
98             }
99 2014         3234 @{$self}{'token_type','token_value','token_pos'}= ($type,$val,$pos0);
  2014         4561  
100 2014         3767 return $type, $val;
101             }
102              
103              
104             sub consume_token {
105 937     937 1 18832 my $self= shift;
106             croak "Can't consume EOF"
107 937 100       3093 if $self->{token_type} eq '0';
108 927         1879 my $val= $self->{token_value};
109 927         2072 $self->next_token;
110 927         2302 return $val;
111             }
112              
113             sub token_context {
114 2     2 1 6 my ($self, %args)= @_;
115             return format_context_multiline($self->{input}, $self->{token_pos}||0, pos($self->{input})||0, \%args)
116 2 50 0     6 if delete $args{multiline};
      0        
117 2   100     20 return format_context_string($self->{input}, $self->{token_pos}||0, pos($self->{input})||0);
      100        
118             }
119              
120              
121 723     723 1 1477 sub parse_expr { shift->parse_or_expr; }
122              
123             sub parse_or_expr {
124 723     723 1 1113 my $self= shift;
125 723         1396 my $first= $self->parse_and_expr;
126 722 50       2054 return $first unless $self->{token_type} eq 'or';
127 0         0 my @or_expr= $first;
128 0         0 while ($self->{token_type} eq 'or') {
129 0         0 $self->next_token;
130 0         0 push @or_expr, $self->parse_and_expr;
131             }
132 0         0 return $self->new_call('or', \@or_expr);
133             }
134              
135             sub parse_and_expr {
136 723     723 1 1132 my $self= shift;
137 723         1485 my $first= $self->parse_not_expr;
138 722 100       1839 return $first unless $self->{token_type} eq 'and';
139 7         19 my @and_expr= $first;
140 7         35 while ($self->{token_type} eq 'and') {
141 7         21 $self->next_token;
142 7         18 push @and_expr, $self->parse_not_expr;
143             }
144 7         20 return $self->new_call('and', \@and_expr);
145             }
146              
147             sub parse_not_expr {
148 730     730 1 1120 my $self= shift;
149 730 100 66     2886 if ($self->{token_type} eq 'not' or $self->{token_type} eq '!') {
150 5         14 $self->next_token;
151 5         15 return $self->new_call('not', [ $self->parse_cmp_expr ]);
152             }
153 725         1511 return $self->parse_cmp_expr;
154             }
155              
156             my %_cmp_ops= map { $_ => 1 } qw( > < >= <= != == );
157             sub parse_cmp_expr {
158 730     730 1 1064 my $self= shift;
159 730         1485 my $first= $self->parse_sum_expr;
160 729 100       2066 return $first unless $_cmp_ops{$self->{token_type}};
161 23         88 my @expr= $first;
162 23         68 while ($_cmp_ops{$self->{token_type}}) {
163 31         76 push @expr, $self->new_string($self->{token_type});
164 31         86 $self->next_token;
165 31         77 push @expr, $self->parse_sum_expr;
166             }
167 23         69 return $self->new_call('compare', \@expr);
168             }
169              
170             sub parse_sum_expr {
171 761     761 1 1203 my $self= shift;
172 761         1508 my $first= $self->parse_prod_expr;
173 760 100 100     2722 return $first unless $self->{token_type} eq '+' or $self->{token_type} eq '-';
174 24         63 my @sum_expr= $first;
175 24   100     80 while ($self->{token_type} eq '+' or $self->{token_type} eq '-') {
176 31         69 my $negate= $self->consume_token eq '-';
177 31         82 my $operand= $self->parse_prod_expr;
178 31 100       166 push @sum_expr, $negate? $self->get_negative($operand) : $operand;
179             }
180 24         67 return $self->new_call('sum', \@sum_expr);
181             }
182              
183             sub parse_prod_expr {
184 792     792 1 1224 my $self= shift;
185 792         1472 my $value= $self->parse_unit_expr;
186 791   100     3380 while ($self->{token_type} eq '*' or $self->{token_type} eq '/') {
187 35         79 my $op= $self->consume_token;
188 35         87 my $right= $self->parse_unit_expr;
189 35 100       164 $value= $self->new_call( $op eq '*'? 'mul' : 'div', [ $value, $right ] );
190             }
191 791         1347 return $value;
192             }
193              
194             sub parse_unit_expr {
195 857     857 1 1271 my $self= shift;
196 857         1236 my $negate= 0;
197 857         1170 my $expr;
198              
199 857 100       1820 if ($self->{token_type} eq '-') {
200 30         78 $self->next_token;
201 30         87 return $self->get_negative($self->parse_unit_expr);
202             }
203              
204 827 100       1616 if ($self->{token_type} eq '(') {
205 11         32 $self->next_token;
206 11         38 my $args= $self->parse_list;
207             die "Expected ')' near ".$self->token_context."\n"
208 11 50       32 if $self->{token_type} ne ')';
209 11         32 $self->next_token;
210 11 100       44 return @$args > 1? $self->new_call('list', $args) : $args->[0];
211             }
212            
213 816 100       1661 if ($self->{token_type} eq 'Number') {
214 317         652 return $self->new_number($self->consume_token);
215             }
216            
217 499 100       968 if ($self->{token_type} eq 'String') {
218 84         179 return $self->new_string($self->consume_token);
219             }
220            
221 415 100       883 if ($self->{token_type} eq 'Identifier') {
222 414         809 my $id= $self->consume_token;
223 414 100       998 if ($self->{token_type} eq '(') {
224 261         684 $self->next_token;
225 261 100       848 my $args= $self->{token_type} eq ')'? [] : $self->parse_list;
226             die "Expected ')' near ".$self->token_context."\n"
227 261 50       593 if $self->{token_type} ne ')';
228 261         640 $self->next_token;
229 261         676 return $self->new_call($id, $args);
230             }
231             else {
232 153         392 return $self->new_symbol($id);
233             }
234             }
235            
236 1 50       11 if ($self->{token_type} eq '0') {
237 1         9 die "Expected expression component near (end of input)";
238             }
239            
240 0         0 die "Unexpected token $self->{token_type} '$self->{token_value}' near ".$self->token_context."\n";
241             }
242              
243             sub parse_list {
244 263     263 1 447 my $self= shift;
245 263         621 my @args= $self->parse_expr;
246 263         659 while ($self->{token_type} eq ',') {
247 188         511 $self->next_token;
248 188         482 push @args, $self->parse_expr;
249             }
250 263         595 return \@args;
251             }
252              
253              
254 6     6 1 35 sub cmp_operators { qw( = == != <> > >= < <= ), "\x{2260}", "\x{2264}", "\x{2265}" }
255 6     6 1 82 sub math_operators { qw( + - * / ) }
256 6     6 1 22 sub logic_operators { qw( and or not ! ) }
257 6     6 1 19 sub list_operators { ',', '(', ')' }
258             sub keyword_map {
259             return {
260 6     6 1 31 (map { $_ => $_ } cmp_operators, math_operators, logic_operators, list_operators),
  132         353  
261             '=' => '==', '<>' => '!=', "\x{2260}" => '!=',
262             "\x{2264}" => '<=', "\x{2265}" => '>='
263             }
264             }
265             sub scanner_rules {
266 6     6 1 15 my $self= shift;
267 6         16 my $keywords= $self->keyword_map;
268             my $kw_regex= join '|', map "\Q$_\E",
269 6         70 sort { length($b) <=> length($a) } # longest keywords get priority
  389         623  
270             keys %$keywords;
271            
272             # Perl 5.20.1 and 5.20.2 have a bug where regex comparisons on unicode strings can crash.
273             # It seems to damage the scalar $1, but copying it first fixes the problem.
274 6 50 33     83 my $kw_canonical= $] >= 5.020000 && $] < 5.020003? '$keywords->{lc(my $clone1= $1)}' : '$keywords->{lc $1}';
275             return (
276             # Pattern Name, Pattern, Token Type and Token Value
277 6         629 [ 'Whitespace', qr/(\s+)/, '"" => ""' ], # empty string causes next_token to loop
278             [ 'Decimal', qr/([0-9]*\.?[0-9]+(?:[eE][+-]?[0-9]+)?)\b/, 'Number => $1+0' ],
279             [ 'Hexadecimal', qr/0x([0-9A-Fa-f]+)/, 'Number => hex($1)' ],
280             [ 'Keywords', qr/($kw_regex)/, $kw_canonical.' => $1', { keywords => $keywords } ],
281             [ 'Identifiers', qr/([A-Za-z_][A-Za-z0-9_.]*)\b/, 'Identifier => $1' ],
282             # Single or double quoted string, using Pascal-style repeated quotes for escaping
283             [ 'StringLiteral', qr/(?:"((?:[^"]|"")*)"|'((?:[^']|'')*)')/, q%
284             do{
285             my $str= defined $1? $1 : $2;
286             $str =~ s/""/"/g if defined $1;
287             $str =~ s/''/'/g if defined $2;
288             (String => $str)
289             }
290             %],
291             );
292             }
293              
294             sub _build_scan_token_method_body {
295 6     6   21 my ($self, $rules)= @_;
296 6         153 return join('', map
297             ' return ' . $_->[2] . ' if $self->{input} =~ /\G' . $_->[1] . "/gc;\n",
298             @$rules
299             ).' return;' # return empty list of no rule matched
300             }
301              
302             sub _build_scan_token_method {
303 6     6   15 my ($pkg, $method_name)= @_;
304 6 50       39 $pkg= ref $pkg if ref $pkg;
305 6 50       25 $method_name= 'scan_token' unless defined $method_name;
306 6         25 my @rules= $pkg->scanner_rules;
307             # collect variables which should be available to the code
308 6 100       32 my %vars= map { $_->[3]? %{ $_->[3] } : () } @rules;
  36         91  
  6         24  
309 6         57 my $code= join "\n",
310             (map 'my $'.$_.' = $vars{'.$_.'};', keys %vars),
311             "sub ${pkg}::$method_name {",
312             ' my $self= shift;',
313             $pkg->_build_scan_token_method_body(\@rules),
314             "}\n";
315             # closure needed for 5.8 and 5.10 which complain about using a lexical
316             # in a sub declared at package scope.
317 7     7   20860 no warnings 'redefine','closure';
  7         19  
  7         6114  
318 6 50   2222 1 3318 eval "$code; 1" or die $@ . " for generated scanner code:\n".$code;
  2222 100       5184  
  2222 100       7152  
  2012 100       6823  
  1690 100       3757  
  1688 100       7164  
  791 100       3108  
  366 100       1214  
  87 100       332  
  87 100       292  
  87         271  
  87         358  
  279         739  
319 6         81 return $pkg->can('scan_token');
320             }
321              
322 2     2 1 10 sub scan_token { my $m= $_[0]->_build_scan_token_method; goto $m; };
  2         52  
323              
324              
325 361     361   956 sub Language::FormulaEngine::Parser::Node::Call::function_name { $_[0][0] }
326 379     379   1400 sub Language::FormulaEngine::Parser::Node::Call::parameters { $_[0][1] }
327             sub Language::FormulaEngine::Parser::Node::Call::evaluate {
328 164     164   1237 my ($self, $namespace)= @_;
329 164         535 $namespace->evaluate_call($self);
330             }
331             sub Language::FormulaEngine::Parser::Node::Call::deparse {
332 12     12   20 my ($node, $parser)= @_;
333             return $node->function_name . (
334 12         23 !@{$node->parameters}? '()'
335 12 100       28 : '( ' .join(', ', map $parser->deparse($_), @{$node->parameters}). ' )'
  11         22  
336             )
337             }
338              
339             sub new_call {
340 362     362 1 1129 my ($self, $fn, $params)= @_;
341 362         1105 $self->functions->{$fn}++; # record dependency on this function
342 362         1501 bless [ $fn, $params ], 'Language::FormulaEngine::Parser::Node::Call';
343             }
344              
345              
346 90     90   158 sub Language::FormulaEngine::Parser::Node::Symbol::symbol_name { ${$_[0]} }
  90         288  
347             sub Language::FormulaEngine::Parser::Node::Symbol::evaluate {
348 62     62   122 my ($self, $namespace)= @_;
349 62         170 $namespace->get_value($$self);
350             }
351             sub Language::FormulaEngine::Parser::Node::Symbol::deparse {
352 14     14   39 shift->symbol_name;
353             }
354              
355             sub new_symbol {
356 153     153 1 319 my ($self, $name)= @_;
357 153         502 $self->symbols->{$name}++; # record dependency on this variable
358 153         474 bless \$name, 'Language::FormulaEngine::Parser::Node::Symbol';
359             }
360              
361              
362 62     62   124 sub Language::FormulaEngine::Parser::Node::String::string_value { ${$_[0]} }
  62         235  
363 51     51   91 sub Language::FormulaEngine::Parser::Node::String::evaluate { ${$_[0]} }
  51         196  
364             sub _str_escape {
365 6     6   11 my $str= shift;
366 6         13 $str =~ s/'/''/g;
367 6         23 "'$str'";
368             }
369             sub Language::FormulaEngine::Parser::Node::String::deparse {
370 6     6   14 _str_escape(shift->string_value);
371             }
372              
373             sub new_string {
374 115     115 1 320 my ($self, $text)= @_;
375 115         382 bless \$text, 'Language::FormulaEngine::Parser::Node::String'
376             }
377              
378              
379 204     204   382 sub Language::FormulaEngine::Parser::Node::Number::number_value { ${$_[0]} }
  204         1224  
380 140     140   230 sub Language::FormulaEngine::Parser::Node::Number::evaluate { ${$_[0]} }
  140         551  
381 7     7   14 sub Language::FormulaEngine::Parser::Node::Number::deparse { shift->number_value }
382              
383             sub new_number {
384 352     352 1 699 my $value= $_[1]+0;
385 352         1061 bless \$value, 'Language::FormulaEngine::Parser::Node::Number'
386             }
387              
388              
389             sub get_negative {
390 40     40 1 79 my ($self, $node)= @_;
391 40 100       209 return $self->new_number(-$node->number_value) if $node->can('number_value');
392 5 50 66     25 return $node->parameters->[0] if $node->can('function_name') and $node->function_name eq 'negative';
393 5         17 return $self->new_call('negative', [$node]);
394             }
395              
396             1;
397              
398             __END__
399              
400             =pod
401              
402             =encoding UTF-8
403              
404             =head1 NAME
405              
406             Language::FormulaEngine::Parser - Create parse tree from an input string
407              
408             =head1 VERSION
409              
410             version 0.06
411              
412             =head1 SYNOPSIS
413              
414             my $parse_tree= Language::FormulaEngine::Parser->new->parse($string);
415              
416             =head1 DESCRIPTION
417              
418             This class scans tokens from an input string and builds a parse tree. In compiler terminology,
419             it is both a Scanner and Parser. It performs a top-down recursive descent parse, because this
420             is easy and gives good error messages. It only parses strings, but leaves room for subclasses
421             to implement streaming. By default, the parser simply applies a Grammar to the input, without
422             checking whether the functions or variables exist, but can be subclassed to do more detailed
423             analysis during the parse.
424              
425             The generated parse tree is made up of Function nodes (each infix operator is converted to a
426             named function) and each Function node may contain Symbols, Strings, Numbers, and other
427             Function nodes. The parse tree can be passed to the Evaluator for instant execution, or passed
428             to the Compiler to generate an optimized perl coderef. The parse tree is lightweight, and does
429             not include token/context information; this could also be added by a subclass.
430              
431             =head1 PUBLIC API
432              
433             =head2 parse
434              
435             Parse a new input text, updating all derived attributes with the result of the operation.
436             It returns the value of L</parse_tree> (which is undef if the parse failed).
437             On failure, the exception is stored in L</error> and other attributes like L</token_pos> may
438             contain useful diagnostic information.
439              
440             =head2 parse_tree
441              
442             This holds the generated parse tree, or C<undef> if the parse failed. See L</"Parse Nodes">.
443              
444             =head2 error
445              
446             This is C<undef> if the parse succeeded, else an error message describing the syntax that ended
447             the parse.
448              
449             =head2 functions
450              
451             A set (hashref) of all function names encountered during the parse.
452              
453             =head2 symbols
454              
455             A set (hashref) of all non-function symbols encountered. (variables, constnts, etc.)
456              
457             =head2 reset
458              
459             Clear the results of the previous parse, to re-use the object. Returns C<$self> for chaining.
460              
461             =head2 deparse
462              
463             my $formula_text= $parser->deparse($tree);
464              
465             Return a canonical formula text for the parse tree, or a parse tree that you supply.
466              
467             =head1 EXTENSIBLE API
468              
469             These methods and attributes are documented for purposes of subclassing the parser.
470              
471             =head2 input
472              
473             The input string being scanned.
474             Code within the parser should access this as C<< $self->{input} >> for efficiency.
475              
476             =head2 input_pos
477              
478             Shortcut for C<< pos($self->{input}) >>.
479              
480             =head2 token_type
481              
482             Type of current token scanned from C<input>.
483             Code within the parser should access this as C<< $self->{token_type} >> for efficiency.
484              
485             =head2 token_value
486              
487             Value of current token scanned from C<input>, with escape sequences and etc resolved to a
488             sensible perl value.
489             Code within the parser should access this as C<< $self->{token_value} >> for efficiency.
490              
491             =head2 token_pos
492              
493             An offset within C<input> where this token started.
494             Code within the parser should access this as C<< $self->{token_pos} >> for efficiency.
495              
496             =head2 next_token
497              
498             Advance to the next token, replacing the values of C<token_> variables and updating
499             C<input_pos>. Returns the token_type, of which all are true except EOF which has a
500             type of C<0>, so this also means the function returns true if it parsed a token and
501             false if it reached EOF. It dies if no token could be parsed.
502             If you call next_token again after the eof token, it throws an exception.
503              
504             This method is a wrapper around L</scan_token>. Override that method to add new token types.
505              
506             =head2 scan_token
507              
508             Pattern-match the next token, and either return C<< $type => $value >> or an empty list if
509             the syntax is invalid. This is intended to be overridden by subclasses.
510              
511             =head2 consume_token
512              
513             return $self->consume_token if $self->{token_type} eq $desired_type;
514              
515             This is a shorthand for returning the current C<token_value> while also calling C<next_token>.
516              
517             =head2 token_context
518              
519             my $text= $self->token_context(%options);
520              
521             Default behavior generates a string like:
522              
523             "'blah blah' on line 15, char 12"
524              
525             Passing C<< token_context(multiline => 1) >> generates a string like
526              
527             "Expected something else at line 15, char 16\n" .
528             "blah blah blah token blah blah\n" .
529             " ^^^^^\n"
530              
531             Multiline additionally takes arguments as described in
532             L<Language::FormulaEngine::Parser::ContextUtil/format_context_multiline>.
533              
534             =head1 GRAMMAR
535              
536             =head2 Parse Rules
537              
538             The default grammar implements the following rules:
539              
540             expr ::= or_expr
541             or_expr ::= and_expr ( 'or' and_expr )*
542             and_expr ::= not_expr ( 'and' not_expr )*
543             not_expr ::= ( 'not' | '!' ) cmp_expr | cmp_expr
544             cmp_expr ::= sum_expr ( ( '=' | '==' | '<>' | '\u2260' | '<' | '<=' | '>' | '>=' ) sum_expr )*
545             sum_expr ::= prod_expr ( ('+' | '-') prod_expr )*
546             prod_expr ::= ( unit_expr ('*' | '/') )* unit_expr
547             unit_expr ::= '-' unit_expr | Identifier '(' list ')' | '(' (expr|list) ')' | Identifier | Number | String
548             list ::= expr ( ',' expr )* ','?
549              
550             C<ident>, C<num>, C<str>, and all the punctuation symbols are tokens.
551              
552             The parser uses a Recursive Descent algorithm implemented as the following method calls.
553             Each method consumes tokens from C<< $self >> and return a L</"PARSE NODES">:
554              
555             =over
556              
557             =item parse_expr
558              
559             =item parse_or_expr
560              
561             =item parse_and_expr
562              
563             =item parse_not_expr
564              
565             =item parse_cmp_expr
566              
567             =item parse_sum_expr
568              
569             =item parse_prod_expr
570              
571             =item parse_unit_expr
572              
573             =item parse_list
574              
575             =back
576              
577             =head2 Token Types
578              
579             =over
580              
581             =item C<'Number'>
582              
583             All the common decimal representations of integers and floating point numbers
584             which perl can parse. Optional decimals and decimal point followed by decimals
585             and optional exponent, ending at either the end of the input or a non-alphanumeric.
586              
587             =item C<'String'>
588              
589             A single-quoted or double-quoted string, treating a double occurrence of the quote
590             character to mean a literal quote character. ("Pascal style")
591              
592             'apostrophes are''nt hard'
593              
594             There are no escape sequences though, so to get control characters or awkward unicode
595             into a string you need something like:
596              
597             concat("smile ",char(0x263A))
598              
599             which depends on those functions being available in the namespace.
600              
601             =item Keywords...
602              
603             Keywords include the "word" tokens like 'OR', but also every text literal seen in a parse rule
604             such as operators and punctuation.
605             The C<token_type> of the keyword is the canonical version of the keyword, and the C<token_value>
606             is the actual text that was captured. The pattern matches the longest keyword possible.
607              
608             =item C<'Identifier'>
609              
610             Any alpha (or underscore) followed by any run of alphanumerics,
611             (including underscore and period).
612              
613             =back
614              
615             =head2 Customizing the Token Scanner
616              
617             The tokens are parsed using a series of regex tests. The regexes and the code that handles a
618             match of that regex are found in package attribute L</scanner_rules>. These regexes and code
619             fragments get lazily compiled into a package method on the first use (per package).
620             Meanwhile, several of those regex are built from other package attributes.
621              
622             =over
623              
624             =item scanner_rules
625              
626             This package method returns a list (not arrayref) of ordered elements of the form
627             C<< [ $name, $regex, $code_fragment, \%vars ] >>. You can subclass this method to inspect
628             the rules (probably based on C<$name>) and replace the regexes, or alter the handler code,
629             or add/remove your own rules. The regexes are attempted in the order they appear in this
630             list. You do not need to use "\G" or "/gc" on these regexes because those are added
631             automatically during compilation.
632              
633             =item keyword_map
634              
635             This package method returns a hashref of all known keywords, mapped to their canonical form.
636             So for instance, a key of C<< '<>' >> with a value of C<< '!=' >>. These tokens automatically
637             become the scanner rule named C<Keywords>. In turn, the contents of this hashref include
638             the L</cmp_operators>, L</math_operators>, L</logic_operators>, and L</list_operators> which
639             can be overridden separately.
640              
641             This method is called once during the compilation of L</scan_token>, and the result is then
642             made into a constant and referenced by the compiled method, so dynamic changes to the output
643             of this method will be ignored.
644              
645             =item cmp_operators
646              
647             Package method that returns a list of comparison operators, like '<', '>=', etc.
648              
649             =item math_operators
650              
651             Package method that returns a list of math operators, like '*', '+', etc.
652              
653             =item logic_operators
654              
655             Package method that returns a list of keywords like 'and', 'or', etc.
656              
657             =item list_operators
658              
659             Package method that returns a list of '(', ')', ','
660              
661             =back
662              
663             =head2 Parse Nodes
664              
665             The parse tree takes a minimalist approach to node classification. In this default
666             implementation, number values, string values, and symbolic references have just a simple
667             wrapper around the value, and function calls are just a pair of function name and list of
668             arguments. All language operators are represented as function calls.
669              
670             A blessed node only needs to support one method: C<< ->evaluate($namespace) >>.
671              
672             The class name of the blessed nodes should be ignored. A function is anything which
673             C<< can("function_name") >>, a string is anything which C<< can("string_value") >>, a number is
674             anything which C<< can("number_value") >> and a symbolic reference is anything which
675             C<< can("symbolic_name") >>.
676              
677             Subclasses of Parser should implemnt new node types as needed. You probable also need to
678             update L</deparse>.
679              
680             The parser rules (C<parse_X_expr> methods) create nodes by the following methods on the Parser
681             class, so that you can easily subclass C<Parser> and override which class of node is getting
682             created.
683              
684             =over
685              
686             =item new_call
687              
688             $node= $parser->new_call( $function_name, $parameters );
689              
690             Generate a node for a function call. The returned node has attributes C<function_name>
691             and C<parameters>
692              
693             =item new_symbol
694              
695             $node= $parser->new_symbol($symbol_name);
696              
697             A reference to a symbolic value (i.e. variable or constant).
698             It has one attribute C<symbol_name>.
699              
700             =item new_string
701              
702             $node= $parser->new_string($string_value);
703              
704             A string literal. It has an attribute C<string_value> holding the raw value.
705              
706             =item new_number
707              
708             $plain_scalar= $parser->new_number($value);
709              
710             A numeric constant. It has an attribute C<number_value> holding the raw value.
711              
712             =item get_negative
713              
714             $negative_node= $parser->get_negative( $node );
715              
716             Utility method to get the "opposite of" a parse node. By default, this wraps it with the
717             function C<'negative'>, unless it already was that function then it unwraps the parameter.
718             It performs simple negation on numbers.
719              
720             =back
721              
722             =head1 AUTHOR
723              
724             Michael Conrad <mconrad@intellitree.com>
725              
726             =head1 COPYRIGHT AND LICENSE
727              
728             This software is copyright (c) 2021 by Michael Conrad, IntelliTree Solutions llc.
729              
730             This is free software; you can redistribute it and/or modify it under
731             the same terms as the Perl 5 programming language system itself.
732              
733             =cut