File Coverage

blib/lib/MarpaX/Languages/ECMAScript/AST/Grammar/ECMAScript_262_5/Pattern/Semantics.pm

Criterion	Covered	Total	%
statement	27	481	5.6
branch	0	124	0.0
condition	0	57	0.0
subroutine	9	95	9.4
pod	2	2	100.0
total	38	759	5.0

line	stmt	bran	cond	sub	pod	time	code
1	1			1		5	use strict;
	1					1
	1					33
2	1			1		4	use warnings FATAL => 'all';
	1					1
	1					38
3
4							package MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Pattern::Semantics;
5	1			1		5	use MarpaX::Languages::ECMAScript::AST::Exceptions qw/:all/;
	1					1
	1					175
6	1			1		9	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses;
	1					1
	1					49
7	1			1		872	use List::Compare::Functional 0.21 qw/get_union_ref/;
	1					9059
	1					109
8	1			1		1647	use Unicode::Normalize qw/NFD NFC/;
	1					2838
	1					134
9	1			1		770	use Import::Into;
	1					3214
	1					78
10
11							#
12							# Credit goes to utf8::all
13							#
14							if ($^V >= v5.11.0) {
15							'feature'->import::into(__PACKAGE__, qw/unicode_strings/);
16							}
17
18							use constant {
19	1					159	ASSERTION_IS_NOT_MATCHER => 0,
20							ASSERTION_IS_MATCHER => 1
21	1			1		12	};
	1					1
22							use constant {
23	1					6639	ORD_a => ord('a'),
24							ORD_z => ord('z'),
25							ORD_A => ord('A'),
26							ORD_Z => ord('Z'),
27							ORD_0 => ord('0'),
28							ORD_9 => ord('9'),
29							ORD__ => ord('_'),
30	1			1		7	};
	1					2
31
32							# ABSTRACT: ECMAScript 262, Edition 5, pattern grammar default semantics package
33
34							our $VERSION = '0.019'; # VERSION
35
36
37							sub new {
38	0			0	1		my ($class) = @_;
39	0						my $self = {_lparen => $MarpaX::Languages::ECMAScript::AST::Grammar::Pattern::lparen};
40	0						bless $self, $class;
41							}
42
43
44							sub lparen {
45	0			0	1		my ($self) = @_;
46
47	0						return $self->{_lparen};
48							}
49
50							#
51							# IMPORTANT NOTE: These actions DELIBIRATELY do not use any perl regular expression. This is the prove that one can
52							# write a fresh regular expression engine from scratch. The only important notion is case-folding. There we rely
53							# on perl.
54							#
55							our @LINETERMINATOR = @{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::LineTerminator()};
56							our %HASHLINETERMINATOR = map {$_ => 1} @LINETERMINATOR;
57							our @WHITESPACE = @{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::WhiteSpace()};
58
59							sub _Pattern_Disjunction {
60	0			0			my ($self, $disjunction) = @_;
61
62	0						my $m = $disjunction;
63
64							return sub {
65							#
66							# Note: $str is a true perl string, $index is a true perl scalar
67							#
68	0			0			my ($str, $index, $multiline, $ignoreCase, $upperCase) = @_;
69	0		0				$multiline //= 0;
70	0		0				$ignoreCase //= 0;
71							$upperCase //= sub {
72	0	0					if ($^V >= v5.11.0) {
73							#
74							# C.f. http://www.effectiveperlprogramming.com/2012/02/fold-cases-properly/
75							# Please note that we really want only the upper case version as per
76							# ECMAScript specification
77							#
78	0						return uc($_[0]);
79							} else {
80							#
81							# C.f. uc from Unicode::Tussle
82							#
83	0						return NFC(uc(NFD($_[0])));
84							}
85	0		0				};
86							#
87							# We localize input, input length, mutiline and ignoreCase
88							#
89	0						local $MarpaX::Languages::ECMAScript::AST::Pattern::input = $str;
90	0						local $MarpaX::Languages::ECMAScript::AST::Pattern::inputLength = length($str);
91	0						local $MarpaX::Languages::ECMAScript::AST::Pattern::multiline = $multiline;
92	0						local $MarpaX::Languages::ECMAScript::AST::Pattern::ignoreCase = $ignoreCase;
93	0						local $MarpaX::Languages::ECMAScript::AST::Pattern::upperCase = $upperCase;
94
95							my $c = sub {
96	0						my ($state) = @_;
97	0						return $state;
98	0						};
99	0						my $cap = [ (undef) x scalar(@{$self->lparen}) ];
	0
100	0						my $x = [ $index, $cap ];
101
102	0						return &$m($x, $c);
103	0						};
104							}
105
106							sub _Disjunction_Alternative {
107	0			0			my ($self, $alternative) = @_;
108	0						return $alternative;
109							}
110
111							sub _Disjunction_Alternative_OR_Disjunction {
112	0			0			my ($self, $alternative, undef, $disjunction) = @_;
113
114	0						my $m1 = $alternative;
115	0						my $m2 = $disjunction;
116
117							return sub {
118	0			0			my ($x, $c) = @_;
119	0						my $r = &$m1($x, $c);
120	0	0					if ($r) {
121	0						return $r;
122							}
123	0						return &$m2($x, $c);
124	0						};
125							}
126
127							sub _Alternative {
128	0			0			my ($self) = @_;
129
130							return sub {
131	0			0			my ($x, $c) = @_;
132	0						return &$c($x);
133	0						};
134							}
135
136							sub _Alternative_Alternative_Term {
137	0			0			my ($self, $alternative, $term) = @_;
138
139	0						my $m1 = $alternative;
140	0						my $m2 = $term;
141
142							return sub {
143	0			0			my ($x, $c) = @_;
144							my $d = sub {
145	0						my ($y) = @_;
146	0						return &$m2($y, $c);
147	0						};
148	0						return &$m1($x, $d);
149	0						};
150							}
151
152							sub _Term_Assertion {
153	0			0			my ($self, $assertion) = @_;
154
155	0						my ($isMatcher, $assertionCode) = @{$assertion};
	0
156
157							return sub {
158	0			0			my ($x, $c) = @_;
159
160	0	0					if (! $isMatcher) {
161	0						my $t = $assertionCode;
162	0						my $r = &$t($x, $c); # Take care! Typo in ECMAScript spec, $c is missing
163	0	0					if (! $r) {
164	0						return 0;
165							}
166	0						return &$c($x);
167							} else {
168	0						my $m = $assertionCode;
169	0						return &$m($x, $c);
170							}
171	0						};
172							}
173
174							sub _Term_Atom {
175	0			0			my ($self, $atom) = @_;
176
177	0						return $atom;
178							}
179
180							sub _repeatMatcher {
181	0			0			my ($m, $min, $max, $greedy, $x, $c, $parenIndex, $parenCount) = @_;
182
183	0	0	0				if (defined($max) && $max == 0) {
184	0						return &$c($x);
185							}
186							my $d = sub {
187	0			0			my ($y) = @_;
188	0	0	0				if ($min == 0 && $y->[0] == $x->[0]) {
189	0						return 0;
190							}
191	0	0					my $min2 = ($min == 0) ? 0 : ($min - 1);
192	0	0					my $max2 = (! defined($max)) ? undef : ($max - 1);
193	0						return _repeatMatcher($m, $min2, $max2, $greedy, $y, $c, $parenIndex, $parenCount);
194	0						};
195	0						my @cap = @{$x->[1]};
	0
196	0						foreach my $k (($parenIndex+1)..($parenIndex+$parenCount)) {
197	0						$cap[$k-1] = undef; # Take care, cap index in ECMA spec start at 1
198							}
199	0						my $e = $x->[0];
200	0						my $xr = [$e, \@cap ];
201	0	0					if ($min != 0) {
202	0						return &$m($xr, $d);
203							}
204	0	0					if (! $greedy) {
205	0						my $z = &$c($x);
206	0	0					if ($z) {
207	0						return $z;
208							}
209	0						return &$m($xr, $d);
210							}
211	0						my $z = &$m($xr, $d);
212	0	0					if ($z) {
213	0						return $z;
214							}
215	0						return &$c($x);
216							}
217
218							sub _parenIndexAndCount {
219	0			0			my ($self) = @_;
220
221	0						my ($start, $end) = Marpa::R2::Context::location();
222	0						my $parenIndex = 0;
223	0						my $parenCount = 0;
224	0						foreach (@{$self->lparen}) {
	0
225	0	0					if ($_ < $start) {
		0
226	0						++$parenIndex;
227							}
228							elsif ($_ <= $end) {
229	0						++$parenCount;
230							}
231							}
232	0						return {parenIndex => $parenIndex, parenCount => $parenCount};
233							}
234
235							#
236							# Note: we will use undef for $max when its value is infinite
237							#
238							sub _Term_Atom_Quantifier {
239	0			0			my ($self, $atom, $quantifier) = @_;
240
241	0						my $m = $atom;
242	0						my ($min, $max, $greedy) = @{$quantifier};
	0
243	0	0	0				if (defined($max) && $max < $min) {
244	0						SyntaxError("Bad quantifier {$min,$max} in regular expression");
245							}
246	0						my $hashp = $self->_parenIndexAndCount();
247
248							return sub {
249	0			0			my ($x, $c) = @_;
250
251	0						return _repeatMatcher($m, $min, $max, $greedy, $x, $c, $hashp->{parenIndex}, $hashp->{parenCount});
252	0						};
253							}
254
255							sub _Assertion_Caret {
256	0			0			my ($self, $caret) = @_;
257
258							return [ASSERTION_IS_NOT_MATCHER,
259							sub {
260	0			0			my ($x) = @_;
261
262
263	0						my $e = $x->[0];
264	0	0					if ($e == 0) {
265	0						return 1;
266							}
267	0	0					if (! $MarpaX::Languages::ECMAScript::AST::Pattern::multiline) {
268	0						return 0;
269							}
270	0						my $c = substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $e-1, 1);
271	0	0					if (exists($HASHLINETERMINATOR{$c})) {
272	0						return 1;
273							}
274
275	0						return 0;
276	0						}],
277							}
278
279							sub _Assertion_Dollar {
280	0			0			my ($self, $caret) = @_;
281
282							return [ASSERTION_IS_NOT_MATCHER,
283							sub {
284	0			0			my ($x) = @_;
285
286	0						my $e = $x->[0];
287	0	0					if ($e == $MarpaX::Languages::ECMAScript::AST::Pattern::inputLength) {
288	0						return 1;
289							}
290	0	0					if (! $MarpaX::Languages::ECMAScript::AST::Pattern::multiline) {
291	0						return 0;
292							}
293	0						my $c = substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $e, 1);
294	0	0					if (exists($HASHLINETERMINATOR{$c})) {
295	0						return 1;
296							}
297
298	0						return 0;
299	0						}];
300							}
301
302							sub _isWordChar {
303	0			0			my ($e) = @_;
304
305	0	0	0				if ($e == -1 \|\| $e == $MarpaX::Languages::ECMAScript::AST::Pattern::inputLength) {
306	0						return 0;
307							}
308							#
309							# This really refers to ASCII characters, so it is ok to test the ord directly
310							#
311	0						my $c = ord(substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $e, 1));
312							#
313							# I put the most probables (corresponding also to the biggest ranges) first
314	0	0	0				if (
			0
			0
			0
			0
			0
315							($c >= ORD_a && $c <= ORD_z)
316							\|\|
317							($c >= ORD_A && $c <= ORD_Z)
318							\|\|
319							($c >= ORD_0 && $c <= ORD_9)
320							\|\|
321							($c == ORD__)
322							) {
323	0						return 1;
324							}
325
326	0						return 0;
327							}
328
329							sub _Assertion_b {
330	0			0			my ($self, $caret) = @_;
331
332							return [ASSERTION_IS_NOT_MATCHER,
333							sub {
334	0			0			my ($x) = @_;
335
336	0						my $e = $x->[0];
337	0						my $a = _isWordChar($e-1);
338	0						my $b = _isWordChar($e);
339	0	0	0				if ($a && ! $b) {
340	0						return 1;
341							}
342	0	0	0				if (! $a && $b) {
343	0						return 1;
344							}
345	0						return 0;
346	0						}];
347							}
348
349							sub _Assertion_B {
350	0			0			my ($self, $caret) = @_;
351
352							return [ASSERTION_IS_NOT_MATCHER,
353							sub {
354	0			0			my ($x) = @_;
355
356	0						my $e = $x->[0];
357	0						my $a = _isWordChar($e-1);
358	0						my $b = _isWordChar($e);
359	0	0	0				if ($a && ! $b) {
360	0						return 0;
361							}
362	0	0	0				if (! $a && $b) {
363	0						return 0;
364							}
365	0						return 1;
366	0						}];
367							}
368
369							sub _Assertion_DisjunctionPositiveLookAhead {
370	0			0			my ($self, undef, $disjunction, undef) = @_;
371
372	0						my $m = $disjunction;
373
374							return [ASSERTION_IS_MATCHER,
375							sub {
376	0			0			my ($x, $c) = @_;
377
378							my $d = sub {
379	0						my ($y) = @_;
380	0						return $y;
381	0						};
382
383	0						my $r = &$m($x, $d);
384	0	0					if (! $r) {
385	0						return 0;
386							}
387	0						my $y = $r;
388	0						my $cap = $y->[1];
389	0						my $xe = $x->[0];
390	0						my $z = [$xe, $cap];
391	0						return &$c($z);
392	0						}];
393							}
394
395							sub _Assertion_DisjunctionNegativeLookAhead {
396	0			0			my ($self, undef, $disjunction, undef) = @_;
397
398	0						my $m = $disjunction;
399
400							return [ASSERTION_IS_MATCHER,
401							sub {
402	0			0			my ($x, $c) = @_;
403
404							my $d = sub {
405	0						my ($y) = @_;
406	0						return $y;
407	0						};
408
409	0						my $r = &$m($x, $d);
410	0	0					if ($r) {
411	0						return 0;
412							}
413	0						return &$c($x);
414	0						}];
415							}
416
417							sub _Quantifier_QuantifierPrefix {
418	0			0			my ($self, $quantifierPrefix) = @_;
419
420	0						my ($min, $max) = @{$quantifierPrefix};
	0
421	0						return [$min, $max, 1];
422							}
423
424							sub _Quantifier_QuantifierPrefix_QuestionMark {
425	0			0			my ($self, $quantifierPrefix, $questionMark) = @_;
426
427	0						my ($min, $max) = @{$quantifierPrefix};
	0
428	0						return [$min, $max, 0];
429							}
430
431							sub _QuantifierPrefix_Star {
432	0			0			my ($self, $start) = @_;
433
434	0						return [0, undef];
435							}
436
437							sub _QuantifierPrefix_Plus {
438	0			0			my ($self, $plus) = @_;
439
440	0						return [1, undef];
441							}
442
443							sub _QuantifierPrefix_QuestionMark {
444	0			0			my ($self, $questionMark) = @_;
445
446	0						return [0, 1];
447							}
448
449							sub _QuantifierPrefix_DecimalDigits {
450	0			0			my ($self, undef, $decimalDigits, undef) = @_;
451
452	0						return [$decimalDigits, $decimalDigits];
453							}
454
455							sub _QuantifierPrefix_DecimalDigits_Comma {
456	0			0			my ($self, undef, $decimalDigits, undef) = @_;
457
458	0						return [$decimalDigits, undef];
459							}
460
461							sub _QuantifierPrefix_DecimalDigits_DecimalDigits {
462	0			0			my ($self, undef, $decimalDigits1, undef, $decimalDigits2, undef) = @_;
463
464	0						return [$decimalDigits1, $decimalDigits2];
465							}
466
467							sub _canonicalize {
468	0			0			my ($ch) = @_;
469
470	0	0					if (! $MarpaX::Languages::ECMAScript::AST::Pattern::ignoreCase) {
471	0						return $ch;
472							}
473
474	0						my $u = &$MarpaX::Languages::ECMAScript::AST::Pattern::upperCase($ch);
475	0	0					if (length($u) != 1) {
476							#
477							# I don't know why it has been designed like that -;
478							#
479	0						return $ch;
480							}
481	0						my $cu = $u;
482	0	0	0				if (ord($ch) >= 128 && ord($cu) < 128) {
483	0						return $ch;
484							}
485	0						return $cu;
486							}
487
488							#
489							# Note: we extend a little the notion of range to:
490							# * range including characters from ... to ...
491							# and
492							# * range NOT including characters from ... to ...
493							#
494							# i.e. a character set is [ negation flag, [range] ]
495							#
496							# This is different from the invert flag. For example:
497							# [^\d] means: $A=[0,[0..9]], $invert=1
498							# [^\D] means: $A=[1,[0..9]], $invert=1, which is equivalent to [\d], i.e.: $A=[0,[0..9], $invert=0
499
500							sub _characterSetMatcher {
501	0			0			my ($self, $A, $invert) = @_;
502
503	0						my ($x, $c) = @_;
504
505	0						my ($Anegation, $Arange) = @{$A};
	0
506
507	0	0					if ($Anegation) {
508	0						$invert = ! $invert;
509							}
510
511							return sub {
512	0			0			my ($x, $c) = @_;
513
514	0						my $e = $x->[0];
515	0	0					if ($e == $MarpaX::Languages::ECMAScript::AST::Pattern::inputLength) {
516	0						return 0;
517							}
518	0						my $ch = substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $e, 1);
519	0						my $cc = _canonicalize($ch);
520	0	0					if (! $invert) {
521	0	0					if (! grep {$cc eq _canonicalize($_)} @{$Arange}) {
	0
	0
522	0						return 0;
523							}
524							} else {
525	0	0					if (grep {$cc eq _canonicalize($_)} @{$Arange}) {
	0
	0
526	0						return 0;
527							}
528							}
529	0						my $cap = $x->[1];
530	0						my $y = [$e+1, $cap];
531	0						return &$c($y);
532	0						};
533							}
534
535							sub _Atom_PatternCharacter {
536	0			0			my ($self, $patternCharacter) = @_;
537
538							#
539							# Note: PatternCharacter is a lexeme, default lexeme value is [start,length,value]
540							#
541	0						my $ch = $patternCharacter->[2];
542	0						my $A = [0 , [ $ch ]];
543	0						return $self->_characterSetMatcher($A, 0);
544							}
545
546							sub _Atom_Dot {
547	0			0			my ($self, $dot) = @_;
548
549	0						my $A = [1 , \@LINETERMINATOR];
550	0						return $self->_characterSetMatcher($A, 0);
551
552
553							}
554
555							sub _Atom_Backslash_AtomEscape {
556	0			0			my ($self, $backslash, $atomEscape) = @_;
557
558	0						return $atomEscape;
559							}
560
561							sub _Atom_Backslash_CharacterClass {
562	0			0			my ($self, $characterClass) = @_;
563
564	0						my ($A, $invert) = @{$characterClass};
	0
565	0						return $self->_characterSetMatcher($A, $invert);
566							}
567
568							sub _Atom_Lparen_Disjunction_Rparen {
569	0			0			my ($self, $lparen, $disjunction, $rparen) = @_;
570
571	0						my $m = $disjunction;
572	0						my $parenIndex = $self->_parenIndexAndCount()->{parenIndex};
573							return sub {
574	0			0			my ($x, $c) = @_;
575
576							my $d = sub {
577	0						my ($y) = @_;
578
579	0						my @cap = @{$y->[1]};
	0
580	0						my $xe = $x->[0];
581	0						my $ye = $y->[0];
582	0						my $s = substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $xe, $ye-$xe);
583	0						$cap[$parenIndex] = $s; # Take care, in ECMA spec, cap index start at 1
584	0						my $z = [$ye, \@cap ];
585	0						return &$c($z);
586	0						};
587
588	0						return &$m($x, $d);
589	0						};
590							}
591
592							sub _Atom_nonCapturingDisjunction {
593	0			0			my ($self, undef, $disjunction, undef) = @_;
594
595	0						return $disjunction;
596							}
597
598							sub _AtomEscape_DecimalEscape {
599	0			0			my ($self, $decimalEscape) = @_;
600
601	0						my $E = $decimalEscape;
602
603							#
604							# We are in an atom escape context: the only allowed character is NUL
605							#
606	0	0					my $ch = ($decimalEscape == 0) ? MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::NULL()->[0] : undef;
607
608	0	0					if (defined($ch)) {
609	0						my $A = [0 , [ $ch ]];
610	0						return $self->_characterSetMatcher($A, 0);
611							}
612	0						my $n = $E;
613	0	0	0				if ($n == 0 \|\| $n > scalar(@{$self->lparen})) {
	0
614	0						SyntaxError("backtrack number must be <= " . scalar(@{$self->lparen}));
	0
615							}
616							return sub {
617	0			0			my ($x, $c) = @_;
618
619	0						my $cap = $x->[1];
620	0						my $s = $cap->[$n-1]; # Take care, in ECMA spec cap index start at 1
621	0	0					if (! defined($s)) {
622	0						return &$c($x);
623							}
624	0						my $e = $x->[0];
625	0						my $len = length($s);
626	0						my $f = $e+$len;
627	0	0					if ($f > $MarpaX::Languages::ECMAScript::AST::Pattern::inputLength) {
628	0						return 0;
629							}
630	0						foreach (0..($len-1)) {
631	0	0					if (_canonicalize(substr($s, $_, 1)) ne _canonicalize(substr($MarpaX::Languages::ECMAScript::AST::Pattern::input, $e+$_, 1))) {
632	0						return 0;
633							}
634							}
635	0						my $y = [$f, $cap];
636	0						return &$c($y);
637	0						};
638							}
639
640							sub _AtomEscape_CharacterEscape {
641	0			0			my ($self, $characterEscape) = @_;
642
643	0						my $ch = $characterEscape;
644	0						my $A = [0 , [ $ch ]];
645	0						return $self->_characterSetMatcher($A, 0);
646							}
647
648							sub _AtomEscape_CharacterClassEscape {
649	0			0			my ($self, $characterClassEscape) = @_;
650
651	0						return $self->_characterSetMatcher($characterClassEscape, 0);
652							}
653
654							sub _CharacterEscape_ControlEscape {
655	0			0			my ($self, $controlEscape) = @_;
656
657							#
658							# Note: ControlEscape is a lexeme, default lexeme value is [start,length,value]
659							#
660	0	0					if ($controlEscape->[2] eq 't') {
		0
		0
		0
		0
661	0						return MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::TAB()->[0];
662							}
663							elsif ($controlEscape->[2] eq 'n') {
664	0						return MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::LF()->[0];
665							}
666							elsif ($controlEscape->[2] eq 'v') {
667	0						return MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::VT()->[0];
668							}
669							elsif ($controlEscape->[2] eq 'f') {
670	0						return MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::FF()->[0];
671							}
672							elsif ($controlEscape->[2] eq 'r') {
673	0						return MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::CR()->[0];
674							}
675							}
676
677							sub _CharacterEscape_ControlLetter {
678	0			0			my ($self, undef, $controlLetter) = @_;
679
680							#
681							# Note: ControlEscape is a lexeme, default lexeme value is [start,length,value]
682							#
683	0						my $ch = $controlLetter->[2];
684	0						my $i = ord($ch);
685	0						my $j = $i % 32;
686	0						return chr($j);
687							}
688
689							#
690							# Note: _HexDigit is a lexeme, default lexeme value is [start,length,value]
691							#
692	0			0			sub _HexEscapeSequence { return chr(16 * hex($_[2]->[2]) + hex($_[3]->[2])); }
693	0			0			sub _UnicodeEscapeSequence { return chr(4096 * hex($_[2]->[2]) + 256 * hex($_[3]->[2]) + 16 * hex($_[4]->[2]) + hex($_[5]->[2])); }
694
695							sub _CharacterEscape_HexEscapeSequence {
696	0			0			my ($self, $hexEscapeSequence) = @_;
697
698	0						return $hexEscapeSequence;
699							}
700
701							sub _CharacterEscape_UnicodeEscapeSequence {
702	0			0			my ($self, $unicodeEscapeSequence) = @_;
703
704	0						return $unicodeEscapeSequence;
705							}
706
707							sub _CharacterEscape_IdentityEscape {
708	0			0			my ($self, $identityEscape) = @_;
709							#
710							# Note: IdentityEscape is a lexeme, default lexeme value is [start,length,value]
711							#
712	0						return $identityEscape->[2];
713							}
714
715							sub _DecimalEscape_DecimalIntegerLiteral {
716	0			0			my ($self, $decimalIntegerLiteral) = @_;
717
718							#
719							# Note: DecimalIntegerLiteral is already an integer
720							#
721	0						my $i = $decimalIntegerLiteral;
722
723	0						return $i;
724							}
725
726							sub _DecimalIntegerLiteral {
727	0			0			my ($self, $decimalIntegerLiteral) = @_;
728
729							#
730							# Note: decimalIntegerLiteral is a lexeme, default lexeme value is [start,length,value]
731							#
732	0						return int($decimalIntegerLiteral->[2]);
733							}
734
735							sub _DecimalDigits {
736	0			0			my ($self, $decimalDigits) = @_;
737
738							#
739							# Note: decimalDigits is a lexeme, default lexeme value is [start,length,value]
740							#
741	0						return int($decimalDigits->[2]);
742							}
743
744							sub _CharacterClassEscape {
745	0			0			my ($self, $cCharacterClassEscape) = @_;
746
747	0	0					if ($cCharacterClassEscape eq 'd') {
		0
		0
		0
		0
		0
748	0						return [0 , [ '0'..'9' ]];
749							}
750							elsif ($cCharacterClassEscape eq 'D') {
751	0						return [1 , [ '0'..'9' ]];
752							}
753							elsif ($cCharacterClassEscape eq 's') {
754	0						return [0 , [ @WHITESPACE, @LINETERMINATOR ]];
755							}
756							elsif ($cCharacterClassEscape eq 'S') {
757	0						return [1 , [ @WHITESPACE, @LINETERMINATOR ]];
758							}
759							elsif ($cCharacterClassEscape eq 'w') {
760	0						return [0 , [ 'a'..'z', 'A'..'Z', '0'..'9', '_' ]];
761							}
762							elsif ($cCharacterClassEscape eq 'W') {
763	0						return [1 , [ 'a'..'z', 'A'..'Z', '0'..'9', '_' ]];
764							}
765
766							}
767
768							sub _CharacterClass_ClassRanges {
769	0			0			my ($self, undef, $classRanges, undef) = @_;
770
771	0						return [$classRanges, 0];
772							}
773
774							sub _CharacterClass_CaretClassRanges {
775	0			0			my ($self, undef, $classRanges, undef) = @_;
776
777	0						return [$classRanges, 1];
778							}
779
780							sub _ClassRanges {
781	0			0			my ($self) = @_;
782
783	0						return [0, []];
784							}
785
786							sub _ClassRanges_NonemptyClassRanges {
787	0			0			my ($self, $nonemptyClassRanges) = @_;
788
789	0						return $nonemptyClassRanges;
790							}
791
792							sub _NonemptyClassRanges_ClassAtom {
793	0			0			my ($self, $classAtom) = @_;
794
795	0						return $classAtom;
796							}
797
798							sub _rangeComplement {
799	0			0			my ($self, $A) = @_;
800
801	0						my ($Anegation, $Arange) = @{$A};
	0
802
803	0						my %hash = map {$_ => 1} @{$Arange};
	0
	0
804
805	0	0					return [ $Anegation ? 0 : 1, [ grep {! exists($hash{$_})} (1.65535) ] ];
	0
806
807							}
808
809							sub _charsetUnion {
810	0			0			my ($self, $A, $B) = @_;
811
812	0						my ($Anegation, $Arange) = @{$A};
	0
813	0						my ($Bnegation, $Brange) = @{$B};
	0
814
815	0	0					if ($Anegation == $Bnegation) {
816							#
817							# If A and B have the same negation, then this really is a normal union
818							#
819	0						return [ $Anegation, get_union_ref('--unsorted', [ $Arange, $Brange ]) ];
820							} else {
821							#
822							# If not A and B have the same negation, then this really is a normal union.
823							# We choose the one with the smallest number of elements
824							#
825	0						my $Aelements = $#{$A->[1]};
	0
826	0						my $Belements = $#{$B->[1]};
	0
827							#
828							# 65534 because this is the maximum index in JavaScript, limited explicitely to UCS-2
829							#
830	0						my $AelementsRevert = 65534 - $#{$A->[1]};
	0
831	0						my $BelementsRevert = 65534 - $#{$B->[1]};
	0
832
833	0	0					if (($Aelements + $BelementsRevert) <= ($AelementsRevert + $Belements)) {
834							#
835							# We take the union of A and reverted B
836							#
837	0						return $self->_charsetUnion($A, $self->_rangeComplement($B));
838							} else {
839							#
840							# We take the union of reverted A and B
841							#
842	0						return $self->_charsetUnion($self->_rangeComplement($A), $B);
843							}
844							}
845							}
846
847							sub _NonemptyClassRanges_ClassAtom_NonemptyClassRangesNoDash {
848	0			0			my ($self, $classAtom, $nonemptyClassRangesNoDash) = @_;
849
850	0						my $A = $classAtom;
851	0						my $B = $nonemptyClassRangesNoDash;
852	0						return $self->_charsetUnion($A, $B);
853							}
854
855							sub _characterRange {
856	0			0			my ($self, $A, $B) = @_;
857
858	0						my ($Anegation, $Arange) = @{$A};
	0
859	0						my ($Bnegation, $Brange) = @{$B};
	0
860
861	0	0					if ($Anegation != $Bnegation) {
862							# We choose the one with the smallest number of elements
863							#
864	0						my $Aelements = $#{$A->[1]};
	0
865	0						my $Belements = $#{$B->[1]};
	0
866							#
867							# 65534 because this is the maximum index in JavaScript, limited explicitely to UCS-2
868							#
869	0						my $AelementsRevert = 65534 - $#{$A->[1]};
	0
870	0						my $BelementsRevert = 65534 - $#{$B->[1]};
	0
871
872	0	0					if ($AelementsRevert <= $BelementsRevert) {
873							#
874							# We take the reverted A
875							#
876	0						($Anegation, $Arange) = $self->_rangeComplement($A);
877							} else {
878							#
879							# We take the reverted B
880							#
881	0						($Bnegation, $Brange) = $self->_rangeComplement($B);
882							}
883							}
884
885	0	0	0				if ($#{$Arange} != 0 \|\| $#{$Brange} != 0) {
	0
	0
886	0						SyntaxError("Doing characterRange requires both charsets to have exactly one element");
887							}
888	0						my $a = $Arange->[0];
889	0						my $b = $Brange->[0];
890	0						my $i = ord($a);
891	0						my $j = ord($b);
892	0	0					if ($i > $j) {
893	0						SyntaxError("Doing characterRange requires first char '$a' to be <= second char '$b'");
894							}
895
896	0						return [$Anegation, [ map {chr($_)} ($i..$j) ]];
	0
897
898							}
899
900							sub _NonemptyClassRanges_ClassAtom_ClassAtom_ClassRanges {
901	0			0			my ($self, $classAtom1, undef, $classAtom2, $classRanges) = @_;
902
903	0						my $A = $classAtom1;
904	0						my $B = $classAtom2;
905	0						my $C = $classRanges;
906	0						my $D = $self->_characterRange($A, $B);
907
908	0						return $self->_charsetUnion($D, $C);
909							}
910
911							sub _NonemptyClassRangesNoDash_ClassAtom {
912	0			0			my ($self, $classAtom) = @_;
913
914	0						return $classAtom;
915							}
916
917							sub _NonemptyClassRangesNoDash_ClassAtomNoDash_NonemptyClassRangesNoDash {
918	0			0			my ($self, $classAtomNoDash, $nonemptyClassRangesNoDash) = @_;
919
920	0						my $A = $classAtomNoDash;
921	0						my $B = $nonemptyClassRangesNoDash;
922	0						return $self->_charsetUnion($A, $B);
923							}
924
925							sub _NonemptyClassRangesNoDash_ClassAtomNoDash_ClassAtom_ClassRanges {
926	0			0			my ($self, $classAtomNoDash, undef, $classAtom, $classRanges) = @_;
927
928	0						my $A = $classAtomNoDash;
929	0						my $B = $classAtom;
930	0						my $C = $classRanges;
931	0						my $D = $self->_characterRange($A, $B);
932	0						return $self->_charsetUnion($D, $C);
933							}
934
935							sub _ClassAtom_Dash {
936	0			0			my ($self, undef) = @_;
937
938	0						return [0, [ '-' ]];
939							}
940
941							sub _ClassAtom_ClassAtomNoDash {
942	0			0			my ($self, $classAtomNoDash) = @_;
943
944	0						return $classAtomNoDash;
945							}
946
947							sub _ClassAtomNoDash_OneChar {
948	0			0			my ($self, $oneChar) = @_;
949
950							#
951							# Note: OneChar is a lexeme, default lexeme value is [start,length,value]
952							#
953	0						return [0, [ $oneChar->[2] ]];
954							}
955
956							sub _ClassAtomNoDash_ClassEscape {
957	0			0			my ($self, undef, $classEscape) = @_;
958
959	0						return $classEscape;
960							}
961
962							sub _ClassEscape_DecimalEscape {
963	0			0			my ($self, $decimalEscape) = @_;
964
965	0						my $E = $decimalEscape;
966
967							#
968							# We are in the ClassEscape context. Only a character is possible.
969							# Yet, it is possible that the codepoint $E correspond to no character
970							#
971	0						my $ch = eval {chr($E)};
	0
972	0	0					if ($@) {
973	0						SyntaxError("Decimal Escape is not a valid character");
974							}
975	0						return [0, [ $ch ]];
976							}
977
978							sub _ClassEscape_b {
979	0			0			my ($self, undef) = @_;
980
981	0						return [0, MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::BS() ];
982							}
983
984							sub _ClassEscape_CharacterEscape {
985	0			0			my ($self, $characterEscape) = @_;
986
987	0						return [0, [ $characterEscape ]];
988							}
989
990							sub _ClassEscape_CharacterClassEscape {
991	0			0			my ($self, $characterClassEscape) = @_;
992
993	0						return $characterClassEscape;
994							}
995
996
997							1;
998
999							__END__