File Coverage

blib/lib/MarpaX/Languages/ECMAScript/AST/Grammar/ECMAScript_262_5/Program.pm

Criterion	Covered	Total	%
statement	30	171	17.5
branch	0	60	0.0
condition	0	42	0.0
subroutine	10	25	40.0
pod	4	4	100.0
total	44	302	14.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# Here is the exhaustive list of difficulties with the ECMAScript grammar:
3							#
4							# * The list of reserved keywords is context sensitive (depend on strict mode)
5							# * The source CAN be not ok v.s. semi-colon: automatic semi-colon insertion will then happen
6							# * Allowed separators is contextual (sometimes no line terminator is allowed)
7							# * RegularExpressionLiteral ambiguity with AssignmentExpression or MultiplicativeExpression
8							#
9	1			1		7	use strict;
	1					1
	1					42
10	1			1		5	use warnings FATAL => 'all';
	1					2
	1					59
11
12							package MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Program;
13	1			1		699	use parent qw/MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Base/;
	1					335
	1					4
14	1			1		953	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Program::Semantics;
	1					4
	1					65
15	1			1		791	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::RegularExpressionLiteral;
	1					4
	1					40
16	1			1		7	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::StringLiteral;
	1					2
	1					27
17	1			1		579	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::NumericLiteral;
	1					3
	1					30
18	1			1		6	use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses;
	1					2
	1					27
19	1			1		7	use MarpaX::Languages::ECMAScript::AST::Exceptions qw/:all/;
	1					2
	1					178
20							#use Log::Any qw/$log/;
21	1			1		6	use SUPER;
	1					2
	1					76
22
23							# ABSTRACT: ECMAScript-262, Edition 5, lexical program grammar written in Marpa BNF
24
25							our $VERSION = '0.018'; # VERSION
26
27							our $WhiteSpace = qr/(?:[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsWhiteSpace}])/;
28							our $LineTerminator = qr/(?:[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsLineTerminator}])/;
29							our $SingleLineComment = qr/(?:\/\/[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsSourceCharacterButNotLineTerminator}]*)/;
30							our $MultiLineComment = qr/(?:(?:\/\)(?:(?:[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsSourceCharacterButNotStar}]+\|\(?![\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsSourceCharacterButNotSlash}])))(?:\\/))/;
31							our $UnicodeLetter = qr/[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsUnicodeLetter}]/;
32							our $HexDigit4 = qr/[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsHexDigit}]{4}/;
33
34							our $S = qr/(?:$WhiteSpace\|$LineTerminator\|$SingleLineComment\|$MultiLineComment)/;
35							our $isPostLineTerminatorLength = qr/\G$S+/;
36							our $isPreSLength = qr/\G$S+/;
37							our $isRcurly = qr/\G$S*\}/;
38							our $isEnd = qr/\G$S*$/;
39							our $isDecimalDigit = qr/\G[\p{MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::CharacterClasses::IsDecimalDigit}]/;
40							our $isIdentifierStart = qr/\G(?:$UnicodeLetter\|\$\|_\|\\u$HexDigit4)/;
41							our @Keyword =
42							qw/break do instanceof typeof
43							case else new var
44							catch finally return void
45							continue for switch while
46							debugger function this with
47							default if throw
48							delete in try/;
49
50							our @FutureReservedWord =
51							qw/class enum extends super
52							const export import/;
53
54							our @FutureReservedWordStrict =
55							qw/implements let private public yield
56							interface package protected static/;
57
58							our @NullLiteral = qw/null/;
59
60							our @BooleanLiteral = qw/true false/;
61
62							#
63							# Force eventual higher priority
64							#
65							our %PRIORITY = (FUNCTION => 2);
66
67							our $grammar_content = do {local $/; };
68
69							#
70							# It is clearer to have reserved words in an array. But for efficienvy the hash is better,
71							# so I create on-the-fly associated hashes using the arrays. Convention is: the lexeme name of
72							# a reserved word is this word, but in capital letters (since none of them is in capital letter)
73							#
74							our %Keyword = map {($_, uc($_))} @Keyword;
75							our %FutureReservedWord = map {($_, uc($_))} @FutureReservedWord;
76							our %FutureReservedWordStrict = map {($_, uc($_))} @FutureReservedWordStrict;
77							our %NullLiteral = map {($_, uc($_))} @NullLiteral;
78							our %BooleanLiteral = map {($_, uc($_))} @BooleanLiteral;
79
80							#
81							# ... And we inject in the grammar those that exist (FutureReservedWord do not)
82							#
83							$grammar_content .= "\n";
84							# ... Priorities
85							map {$grammar_content .= ":lexeme ~ <$_> priority => " . ($PRIORITY{$_} \|\| 1) . "\n"} values %Keyword;
86							map {$grammar_content .= ":lexeme ~ <$_> priority => " . ($PRIORITY{$_} \|\| 1) . "\n"} values %NullLiteral;
87							map {$grammar_content .= ":lexeme ~ <$_> priority => " . ($PRIORITY{$_} \|\| 1) . "\n"} values %BooleanLiteral;
88							# ... Definition
89							map {$grammar_content .= uc($_) . " ~ '$_'\n"} @Keyword;
90							map {$grammar_content .= uc($_) . " ~ '$_'\n"} @NullLiteral;
91							map {$grammar_content .= uc($_) . " ~ '$_'\n"} @BooleanLiteral;
92							#
93							# Injection of grammars.
94							#
95							our $StringLiteral = MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::StringLiteral->new();
96							our $RegularExpressionLiteral = MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::RegularExpressionLiteral->new();
97							our $NumericLiteral = MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Lexical::NumericLiteral->new();
98							$grammar_content .= $StringLiteral->extract;
99							$grammar_content .= $NumericLiteral->extract;
100							$grammar_content .= $RegularExpressionLiteral->extract;
101
102							#
103							# For convenience in the IDENTIFIER$ lexeme callback, we merge Keyword, FutureReservedWord, NullLiteral, BooleanLiteral into
104							# a single hash ReservedWord.
105							#
106							our %ReservedWord = map {$_ => 1} (keys %Keyword, keys %FutureReservedWord, keys %NullLiteral, keys %BooleanLiteral);
107
108
109
110							sub make_grammar_content {
111	0			0	1		my ($class) = @_;
112	0						return $grammar_content;
113							}
114
115
116							sub make_semantics_package {
117	0			0	1		my ($class) = @_;
118	0						return join('::', $class, 'Semantics');
119							}
120
121
122							sub spacesAny {
123	0			0	1		my $self = shift;
124	0	0					if (@_) {
125	0						$self->{_spacesAny} = shift;
126							}
127	0						return $self->{_spacesAny};
128							}
129
130
131							sub parse {
132	0			0	1		my ($self, $source, $impl) = @_;
133							#
134							# Because of Automatic Semicolon Insertion that may happen at the end,
135							# a space is appended to a copy of the source to be parsed.
136							#
137	0						$self->{programCompleted} = 0;
138	0						$source .= ' ';
139	0						return $self->SUPER($source, $impl,
140							{
141							callback => \&_eventCallback,
142							callbackargs => [ $self ],
143							failure => \&_failureCallback,
144							failureargs => [ $self ],
145							end => \&_endCallback,
146							endargs => [ $self ],
147							});
148							}
149
150							sub _eventCallback {
151	0			0			my ($self, $source, $pos, $max, $impl) = @_;
152
153							#
154							# $pos is the exact position where SLIF stopped because of an event
155							#
156	0						my $rc = $pos;
157	0						my %lastLexeme = ();
158	0						my $lastLexemeDoneb = 0;
159
160							#
161							# Cache of some call results
162							#
163	0						$self->{_isIdentifierStart} = {};
164	0						$self->{_isDecimalDigit} = {};
165	0						$self->{_preSLength} = {};
166	0						$self->{_isEnd} = {};
167	0						$self->{_postLineTerminatorLength} = {};
168
169	0						foreach (@{$impl->events()}) {
	0
170	0						my ($name) = @{$_};
	0
171							#
172							# Events are always in this order:
173							#
174							# ---------------------------------
175							# 1. Completion events first (XXX$)
176							# ---------------------------------
177							#
178	0	0	0				if ($name eq 'Program$') {
		0
		0
		0
		0
		0
		0
179							#
180							# Program$ will happen rarely, so even it does cost it is ok to do so
181							#
182	0		0				$self->{programCompleted} = ($self->{_isEnd}->{$pos} //= $self->_isEnd($source, $pos, $impl));
183							}
184							elsif ($name eq 'NumericLiteral$') {
185							#
186							# The source character immediately following a NumericLiteral must not be
187							# an IdentifierStart or DecimalDigit
188							#
189	0	0	0				if (($self->{_isIdentifierStart}->{$pos} //= $self->_isIdentifierStart($source, $pos, $impl)) \|\|
			0
			0
190							($self->{_isDecimalDigit}->{$pos} //= $self->_isDecimalDigit($source, $pos, $impl))) {
191	0						my ($start, $end) = $impl->last_completed_range('NumericLiteral');
192	0						my $lastNumericLiteral = $impl->range_to_string($start, $end);
193	0						SyntaxError(error => "NumericLiteral $lastNumericLiteral must not be immediately followed by an IdentifierStart or DecimalDigit");
194							}
195							}
196							elsif ($name eq 'IDENTIFIER$') {
197	0	0					if (! $lastLexemeDoneb) {
198	0						$self->getLastLexeme(\%lastLexeme, $impl);
199	0						$lastLexemeDoneb = 1;
200							}
201	0	0					if (exists($ReservedWord{$lastLexeme{value}})) {
202	0						SyntaxError(error => "Identifier $lastLexeme{value} is a reserved word");
203							}
204							}
205							#
206							# ------------------------
207							# 2. nulled events (XXX[])
208							# ------------------------
209							#
210							# ------------------------------------
211							# 3. prediction events (^XXX or ^^XXX)
212							# ------------------------------------
213							#
214							elsif ($name eq '^INVISIBLE_SEMICOLON') {
215							#
216							# In the AST, we explicitely associate the ';' to the missing semicolon
217							#
218	0	0					if (! $lastLexemeDoneb) {
219	0						$self->getLastLexeme(\%lastLexeme, $impl);
220	0						$lastLexemeDoneb = 1;
221							}
222	0		0				my $Slength = ($self->{_preSLength}->{$rc} //= $self->_preSLength($source, $rc, $impl));
223	0						$self->_insertInvisibleSemiColon($impl, $rc, $Slength);
224	0						$rc += $Slength;
225							}
226							#
227							# ^PLUSPLUS_POSTFIX, ^MINUSMINUS_POSTFIX
228							# --------------------------------------
229							elsif ($name eq '^PLUSPLUS_POSTFIX' \|\| $name eq '^MINUSMINUS_POSTFIX') {
230	0	0					if (! $lastLexemeDoneb) {
231	0						$self->getLastLexeme(\%lastLexeme, $impl);
232	0						$lastLexemeDoneb = 1;
233							}
234	0						my $postLineTerminatorPos = $lastLexeme{start} + $lastLexeme{length};
235	0		0				my $postLineTerminatorLength = ($self->{_postLineTerminatorLength}->{$postLineTerminatorPos} //= $self->_postLineTerminatorLength($source, $postLineTerminatorPos, $impl));
236	0	0					if ($postLineTerminatorLength > 0) {
237	0						$impl->lexeme_read('SEMICOLON', $postLineTerminatorPos, $postLineTerminatorLength, ';');
238							}
239	0						my $lname = $name;
240	0						substr($lname, 0, 1, '');
241	0	0					my $lvalue = ($lname eq 'PLUSPLUS_POSTFIX') ? '++' : '--';
242	0						$impl->lexeme_read($lname, $rc, 2, $lvalue);
243	0						$rc += 2;
244							}
245							#
246							# ^^DIV (because of REGULAREXPRESSIONLITERAL that can eat it)
247							# -----------------------------------------------------------
248							elsif ($name eq '^^DIV') {
249	0		0				my $realpos = $rc + ($self->{_preSLength}->{$rc} //= $self->_preSLength($source, $rc, $impl));
250	0	0	0				if (index($source, '/', $realpos) == $realpos &&
			0
			0
251							index($source, '/=', $realpos) != $realpos &&
252							index($source, '//', $realpos) != $realpos &&
253							index($source, '/*', $realpos) != $realpos) {
254	0						$impl->lexeme_read('DIV', $realpos, 1, '/');
255	0						$rc = $realpos + 1;
256							}
257							}
258							#
259							# ^^DIVASSIGN (because of REGULAREXPRESSIONLITERAL that can eat it)
260							# ------------------------------------------------------------------
261							elsif ($name eq '^^DIVASSIGN') {
262	0		0				my $realpos = $rc + ($self->{_preSLength}->{$rc} //= $self->_preSLength($source, $rc, $impl));
263	0	0	0				if (index($source, '/=', $realpos) == $realpos &&
			0
264							index($source, '//', $realpos) != $realpos &&
265							index($source, '/*', $realpos) != $realpos) {
266	0						$impl->lexeme_read('DIVASSIGN', $realpos, 2, '/=');
267	0						$rc = $realpos + 2;
268							}
269							}
270							}
271
272							#
273							# Remove cache
274							#
275	0						delete($self->{_isIdentifierStart});
276	0						delete($self->{_isDecimalDigit});
277	0						delete($self->{_preSLength});
278	0						delete($self->{_isEnd});
279	0						delete($self->{_postLineTerminatorLength});
280
281							#if ($rc != $pos) {
282							# $log->tracef('[_eventCallback] Resuming at position %d (was %d when called)', $rc, $pos);
283							#}
284
285	0						return $rc;
286							}
287
288							sub _postLineTerminatorLength {
289							# my ($self, $source, $pos, $impl) = @_;
290
291	0			0			my $rc = 0;
292
293	0						my $prevpos = pos($_[1]);
294	0						pos($_[1]) = $_[2];
295
296							#
297							# Take care: the separator is: _WhiteSpace \| _LineTerminator \| _SingleLineComment \| _MultiLineComment
298							# where a _MultiLineComment that contains a _LineTerminator is considered equivalent to a _LineTerminator
299							#
300							# This is why if we find a separator just before $pos, we check again the presence of _LineTerminator in the match
301							#
302	0	0					if ($_[1] =~ $isPostLineTerminatorLength) {
303	0						my $length = $+[0] - $-[0];
304	0	0					if (substr($_[1], $-[0], $length) =~ /$LineTerminator/) {
305	0						$rc = $length;
306							}
307							}
308
309							#if ($rc > 0) {
310							# $log->tracef('[_postLineTerminatorLength] Found postLineTerminator of length %d', $rc);
311							#}
312
313	0						pos($_[1]) = $prevpos;
314
315	0						return $rc;
316							}
317
318							sub _preSLength {
319							# my ($self, $source, $pos, $impl) = @_;
320
321	0			0			my $rc = 0;
322
323	0						my $prevpos = pos($_[1]);
324	0						pos($_[1]) = $_[2];
325
326	0	0					if ($_[1] =~ $isPreSLength) {
327	0						my $length = $+[0] - $-[0];
328	0						$rc = $length;
329							}
330
331							#if ($rc > 0) {
332							# $log->tracef('[_preSLength] Found S of length %d', $rc);
333							#}
334
335	0						pos($_[1]) = $prevpos;
336
337	0						return $rc;
338							}
339
340							sub _isRcurly {
341							# my ($self, $source, $pos, $impl) = @_;
342
343	0			0			my $rc = 0;
344
345	0						my $prevpos = pos($_[1]);
346	0						pos($_[1]) = $_[2];
347
348	0	0					if ($_[1] =~ $isRcurly) {
349	0						$rc = 1;
350							}
351
352							#if ($rc) {
353							# $log->tracef('[_isRcurly] Found \'}\'');
354							#}
355
356	0						pos($_[1]) = $prevpos;
357
358	0						return $rc;
359							}
360
361							sub _isIdentifierStart {
362							# my ($self, $source, $pos, $impl) = @_;
363
364	0			0			my $rc = 0;
365
366	0						my $prevpos = pos($_[1]);
367	0						pos($_[1]) = $_[2];
368
369	0	0					if ($_[1] =~ $isIdentifierStart) {
370	0						$rc = 1;
371							}
372
373							#if ($rc) {
374							# $log->tracef('[_isIdentifierStart] Found \'%s\'', $&);
375							#}
376
377	0						pos($_[1]) = $prevpos;
378
379	0						return $rc;
380							}
381
382							sub _isDecimalDigit {
383							# my ($self, $source, $pos, $impl) = @_;
384
385	0			0			my $rc = 0;
386
387	0						my $prevpos = pos($_[1]);
388	0						pos($_[1]) = $_[2];
389
390	0	0					if ($_[1] =~ $isDecimalDigit) {
391	0						$rc = 1;
392							}
393
394							#if ($rc) {
395							# $log->tracef('[_isDecimalDigit] Found \'%s\'', $&);
396							#}
397
398	0						pos($_[1]) = $prevpos;
399
400	0						return $rc;
401							}
402
403							sub _isEnd {
404							# my ($self, $source, $pos, $impl) = @_;
405
406	0			0			my $grammar = $_[0]->spacesAny->{grammar};
407	0						my $impl = $_[0]->spacesAny->{impl};
408	0						$grammar->parse($_[1], $impl, $_[2]);
409	0						return $grammar->endReached;
410							}
411
412							sub _insertSemiColon {
413	0			0			my ($self, $impl, $pos, $length) = @_;
414
415	0	0					if (! $impl->lexeme_read('SEMICOLON', $pos, $length, ';')) {
416	0						SyntaxError(error => "Automatic Semicolon Insertion not allowed at position $pos");
417							}
418							}
419
420							sub _insertInvisibleSemiColon {
421	0			0			my ($self, $impl, $pos, $length) = @_;
422
423	0	0					if (! $impl->lexeme_read('INVISIBLE_SEMICOLON', $pos, $length, ';')) {
424	0						SyntaxError(error => "Automatic Invisible Semicolon Insertion not allowed at position $pos");
425							}
426							}
427
428							sub _failureCallback {
429	0			0			my ($self, $source, $pos, $max, $impl) = @_;
430
431							#
432							# The position of failure is exactly the end of the very last lexeme
433							#
434	0						my %lastLexeme = ();
435	0						$self->getLastLexeme(\%lastLexeme, $impl);
436	0						my $rc = $lastLexeme{start} + $lastLexeme{length};
437
438							#
439							# Automatic Semicolon Insertion rules apply here
440							#
441							# 1. When, as the program is parsed from left to right, a token
442							# (called the offending token) is encountered that is not allowed
443							# by any production of the grammar, then a semicolon is automatically
444							# inserted before the offending token if one or more of the following conditions is true:
445							# - The offending token is separated from the previous token by at least one LineTerminator.
446							# - The offending token is }.
447							#
448	0						my $length = 0;
449	0	0					if (($length = $self->_postLineTerminatorLength($source, $rc, $impl)) > 0) {
		0
450	0						$self->_insertSemiColon($impl, $rc, $length);
451	0						$rc += $length;
452							} elsif ($self->_isRcurly($source, $rc, $impl)) {
453	0						$self->_insertSemiColon($impl, $rc, 1);
454							} else {
455	0						SyntaxError();
456							}
457
458	0						return $rc;
459							}
460
461							sub _endCallback {
462	0			0			my ($self, $source, $pos, $max, $impl) = @_;
463
464	0	0					if ($self->{programCompleted}) {
465	0						return;
466							}
467
468							#
469							# Automatic Semicolon Insertion rules apply here
470							#
471							# 2. When, as the program is parsed from left to right, the end of the input stream of tokens
472							# is encountered and the parser is unable to parse the input token stream as a single complete ECMAScript Program
473							# then a semicolon is automatically inserted at the end of the input stream.
474							#
475	0	0					if (! $self->{programCompleted}) {
476	0						my %lastLexeme = ();
477	0						$self->getLastLexeme(\%lastLexeme, $impl);
478	0						my $lastValidPos = $lastLexeme{start} + $lastLexeme{length};
479	0						$self->_insertSemiColon($impl, $lastValidPos, 1);
480	0						my $haveProgramCompletion = grep {$_ eq 'Program$'} map {$_->[0]} @{$impl->events};
	0
	0
	0
481	0	0					if (! $haveProgramCompletion) {
482	0						SyntaxError(error => "Incomplete program");
483							}
484							}
485							}
486
487
488							1;
489
490							=pod
491
492							=encoding utf-8
493
494							=head1 NAME
495
496							MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Program - ECMAScript-262, Edition 5, lexical program grammar written in Marpa BNF
497
498							=head1 VERSION
499
500							version 0.018
501
502							=head1 SYNOPSIS
503
504							use strict;
505							use warnings FATAL => 'all';
506							use MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Program;
507
508							my $grammar = MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Program->new();
509
510							my $grammar_content = $grammar->content();
511							my $grammar_option = $grammar->grammar_option();
512							my $recce_option = $grammar->recce_option();
513
514							=head1 DESCRIPTION
515
516							This modules returns describes the ECMAScript 262, Edition 5 lexical program grammar written in Marpa BNF, as of L. This module inherits the methods from MarpaX::Languages::ECMAScript::AST::Grammar::ECMAScript_262_5::Base package.
517
518							=head1 SUBROUTINES/METHODS
519
520							=head2 make_grammar_content($class)
521
522							Returns the grammar. This will be injected in the Program's grammar.
523
524							=head2 semantics_package($class)
525
526							Class method that returns Program default recce semantics_package. These semantics are adding ruleId to all values, and execute eventually StringLiteral lexical grammar.
527
528							=head2 spacesAny($self, $spacesAny)
529
530							Getter/Setter of a SpacesAny grammar implementation, used internally by the Program grammar.
531
532							=head2 parse($self, $source, $impl)
533
534							Parse the source given as $source using implementation $impl.
535
536							=head1 SEE ALSO
537
538							L
539
540							=head1 AUTHOR
541
542							Jean-Damien Durand
543
544							=head1 COPYRIGHT AND LICENSE
545
546							This software is copyright (c) 2013 by Jean-Damien Durand.
547
548							This is free software; you can redistribute it and/or modify it under
549							the same terms as the Perl 5 programming language system itself.
550
551							=cut
552
553							__DATA__