File Coverage

blib/lib/WAP/wmls/lexer.pm

Criterion	Covered	Total	%
statement	12	161	7.4
branch	0	204	0.0
condition	0	25	0.0
subroutine	4	13	30.7
pod	0	2	0.0
total	16	405	3.9

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# WMLScript Language Specification Version 1.1
3							#
4							# Lexer module
5							#
6
7							package WAP::wmls::lexer;
8
9	1			1		119448	use strict;
	1					3
	1					35
10	1			1		6	use warnings;
	1					1
	1					25
11	1			1		5	use bigint;
	1					2
	1					9
12	1			1		1279	use bignum;
	1					3
	1					9
13
14							sub _DoubleStringLexer {
15	0			0			my ($parser) = @_;
16	0						my $str = q{};
17	0						my $type = 'STRING_LITERAL';
18
19	0						while ($parser->YYData->{INPUT}) {
20
21	0						for ($parser->YYData->{INPUT}) {
22
23	0	0					s/^\"//
24							and return ($type, $str);
25
26	0	0					s/^([^"\\]+)//
27							and $str .= $1,
28							last;
29
30	0	0					s/^\\(['"\\\/])//
31							and $str .= $1, # single quote, double quote, backslash, slash
32							last;
33	0	0					s/^\\b//
34							and $str .= "\b", # backspace
35							last;
36	0	0					s/^\\f//
37							and $str .= "\f", # form feed
38							last;
39	0	0					s/^\\n//
40							and $str .= "\n", # new line
41							last;
42	0	0					s/^\\r//
43							and $str .= "\r", # carriage return
44							last;
45	0	0					s/^\\t//
46							and $str .= "\t", # horizontal tab
47							last;
48	0	0					if ($type eq 'UTF8_STRING_LITERAL') {
49	0	0					s/^\\([0-7]{1,2})//
50							and $str .= chr oct $1,
51							last;
52	0	0					s/^\\([0-3][0-7]{2})//
53							and $str .= chr oct $1,
54							last;
55	0	0					s/^\\x([0-9A-Fa-f]{2})//
56							and $str .= chr hex $1,
57							last;
58							}
59							else {
60	0	0					if ($parser->YYData->{encoding} eq 'iso-8859-1') {
61	0	0					s/^\\([0-7]{1,2})//
62							and $str .= chr oct $1,
63							last;
64	0	0					s/^\\([0-3][0-7]{2})//
65							and $str .= chr oct $1,
66							last;
67	0	0					s/^\\x([0-9A-Fa-f]{2})//
68							and $str .= chr hex $1,
69							last;
70							}
71							else {
72	0	0	0				s/^\\([0-7]{1,2})//
73							and $type = 'UTF8_STRING_LITERAL',
74							and $str .= chr oct $1,
75							last;
76	0	0	0				s/^\\([0-3][0-7]{2})//
77							and $type = 'UTF8_STRING_LITERAL',
78							and $str .= chr oct $1,
79							last;
80	0	0	0				s/^\\x([0-9A-Fa-f]{2})//
81							and $type = 'UTF8_STRING_LITERAL',
82							and $str .= chr hex $1,
83							last;
84							}
85							}
86	0	0					if ($type eq 'UTF8_STRING_LITERAL') {
87	0	0					s/^\\u([0-9A-Fa-f]{4})//
88							and $str .= pack('U', hex $1),
89							last;
90							}
91							else {
92	0	0	0				s/^\\u([0-9A-Fa-f]{4})//
93							and $type = 'UTF8_STRING_LITERAL',
94							and $str .= pack('U', hex $1),
95							last;
96							}
97	0	0					s/^\\//
98							and $parser->Error("Invalid escape sequence $_ .\n"),
99							last;
100							}
101							}
102
103	0						$parser->Error("Untermined string.\n");
104	0						$parser->YYData->{lineno} ++;
105	0						return ($type, $str);
106							}
107
108							sub _SingleStringLexer {
109	0			0			my ($parser) = @_;
110	0						my $str = q{};
111	0						my $type = 'STRING_LITERAL';
112
113	0						while ($parser->YYData->{INPUT}) {
114
115	0						for ($parser->YYData->{INPUT}) {
116
117	0	0					s/^'//
118							and return ($type, $str);
119
120	0	0					s/^([^'\\]+)//
121							and $str .= $1,
122							last;
123
124	0	0					s/^\\(['"\\\/])//
125							and $str .= $1, # single quote, double quote, backslash, slash
126							last;
127	0	0					s/^\\b//
128							and $str .= "\b", # backspace
129							last;
130	0	0					s/^\\f//
131							and $str .= "\f", # form feed
132							last;
133	0	0					s/^\\n//
134							and $str .= "\n", # new line
135							last;
136	0	0					s/^\\r//
137							and $str .= "\r", # carriage return
138							last;
139	0	0					s/^\\t//
140							and $str .= "\t", # horizontal tab
141							last;
142	0	0					if ($type eq 'UTF8_STRING_LITERAL') {
143	0	0					s/^\\([0-7]{1,2})//
144							and $str .= chr oct $1,
145							last;
146	0	0					s/^\\([0-3][0-7]{2})//
147							and $str .= chr oct $1,
148							last;
149	0	0					s/^\\x([0-9A-Fa-f]{2})//
150							and $str .= chr hex $1,
151							last;
152							}
153							else {
154	0	0					if ($parser->YYData->{encoding} eq 'iso-8859-1') {
155	0	0					s/^\\([0-7]{1,2})//
156							and $str .= chr oct $1,
157							last;
158	0	0					s/^\\([0-3][0-7]{2})//
159							and $str .= chr oct $1,
160							last;
161	0	0					s/^\\x([0-9A-Fa-f]{2})//
162							and $str .= chr hex $1,
163							last;
164							}
165							else {
166	0	0	0				s/^\\([0-7]{1,2})//
167							and $type = 'UTF8_STRING_LITERAL',
168							and $str .= chr oct $1,
169							last;
170	0	0	0				s/^\\([0-3][0-7]{2})//
171							and $type = 'UTF8_STRING_LITERAL',
172							and $str .= chr oct $1,
173							last;
174	0	0	0				s/^\\x([0-9A-Fa-f]{2})//
175							and $type = 'UTF8_STRING_LITERAL',
176							and $str .= chr hex $1,
177							last;
178							}
179							}
180	0	0					if ($type eq 'UTF8_STRING_LITERAL') {
181	0	0					s/^\\u([0-9A-Fa-f]{4})//
182							and $str .= pack('U', hex $1),
183							last;
184							}
185							else {
186	0	0	0				s/^\\u([0-9A-Fa-f]{4})//
187							and $type = 'UTF8_STRING_LITERAL',
188							and $str .= pack('U', hex $1),
189							last;
190							}
191	0	0					s/^\\//
192							and $parser->Error("Invalid escape sequence $_ .\n"),
193							last;
194							}
195							}
196
197	0						$parser->Error("Untermined string.\n");
198	0						$parser->YYData->{lineno} ++;
199	0						return ($type, $str);
200							}
201
202							sub _Identifier {
203	0			0			my ($parser, $ident) = @_;
204
205	0	0					if (exists $parser->YYData->{keyword}{$ident}) {
		0
206	0						return ($parser->YYData->{keyword}{$ident}, $ident);
207							}
208							elsif (exists $parser->YYData->{invalid_keyword}{$ident}) {
209	0						$parser->Error("Invalid keyword '$ident'.\n");
210							}
211	0						return ('IDENTIFIER', $ident);
212							}
213
214							sub _OctInteger {
215	0			0			my ($parser, $str) = @_;
216
217	0						my $val = 0;
218	0						foreach (split //, $str) {
219	0						$val = $val * 8 + oct $_;
220							}
221	0						return ('INTEGER_LITERAL', $val);
222							}
223
224							sub _HexInteger {
225	0			0			my ($parser, $str) = @_;
226
227	0						my $val = 0;
228	0						foreach (split //, $str) {
229	0						$val = $val * 16 + hex $_;
230							}
231	0						return ('INTEGER_LITERAL', $val);
232							}
233
234							sub _CommentLexer {
235	0			0			my ($parser) = @_;
236
237	0						while (1) {
238	0	0	0				$parser->YYData->{INPUT}
239							or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
240							or return;
241
242	0						for ($parser->YYData->{INPUT}) {
243	0	0					s/^\n//
244							and $parser->YYData->{lineno} ++,
245							last;
246	0	0					s/^\*\///
247							and return;
248	0	0					s/^.//
249							and last;
250							}
251							}
252							}
253
254							sub _DocLexer {
255	0			0			my ($parser) = @_;
256
257	0						$parser->YYData->{doc} = q{};
258	0						my $flag = 1;
259	0						while (1) {
260	0	0	0				$parser->YYData->{INPUT}
261							or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
262							or return;
263
264	0						for ($parser->YYData->{INPUT}) {
265	0	0					s/^(\n)//
266							and $parser->YYData->{lineno} ++,
267							$parser->YYData->{doc} .= $1,
268							$flag = 0,
269							last;
270	0	0					s/^\*\///
271							and return;
272	0	0					unless ($flag) {
273	0	0					s/^\*//
274							and $flag = 1,
275							last;
276							}
277							s/^([ \r\t\f\013]+)//
278	0	0					and $parser->YYData->{doc} .= $1,
279							last;
280	0	0					s/^(.)//
281							and $parser->YYData->{doc} .= $1,
282							$flag = 1,
283							last;
284							}
285							}
286							}
287
288							sub Lexer {
289	0			0	0		my ($parser) = @_;
290
291	0						while (1) {
292	0	0	0				$parser->YYData->{INPUT}
293							or $parser->YYData->{INPUT} = readline $parser->YYData->{fh}
294							or return ('', undef);
295
296	0						for ($parser->YYData->{INPUT}) {
297
298	0						s/^[ \r\t\f\013]+//; # Whitespace
299	0	0					s/^\n//
300							and $parser->YYData->{lineno} ++,
301							last;
302
303	0	0					s/^\/\\// # documentation
304							and _DocLexer($parser),
305							last;
306
307	0	0					s/^\/\*// # MultiLineComment
308							and _CommentLexer($parser),
309							last;
310	0	0					s/^\/\/(.*)\n// # SingleLineComment
311							and $parser->YYData->{lineno} ++,
312							last;
313
314	0	0					s/^([0-9]+\.[0-9]+[Ee][+\-]?[0-9]+)//
315							and return ('FLOAT_LITERAL', $1);
316	0	0					s/^([0-9]+[Ee][+\-]?[0-9]+)//
317							and return ('FLOAT_LITERAL', $1);
318	0	0					s/^(\.[0-9]+[Ee][+\-]?[0-9]+)//
319							and return ('FLOAT_LITERAL', $1);
320	0	0					s/^([0-9]+\.[0-9]+)//
321							and return ('FLOAT_LITERAL', $1);
322	0	0					s/^([0-9]+\.)//
323							and return ('FLOAT_LITERAL', $1);
324	0	0					s/^(\.[0-9]+)//
325							and return ('FLOAT_LITERAL', $1);
326
327	0	0					s/^0([0-7]+)//
328							and return _OctInteger($parser, $1);
329	0	0					s/^0[Xx]([A-Fa-f0-9]+)//
330							and return _HexInteger($parser, $1);
331	0	0					s/^(0)//
332							and return ('INTEGER_LITERAL', $1);
333	0	0					s/^([1-9][0-9]*)//
334							and return ('INTEGER_LITERAL', $1);
335
336	0	0					s/^\"//
337							and return _DoubleStringLexer($parser);
338
339	0	0					s/^\'//
340							and return _SingleStringLexer($parser);
341
342	0	0					s/^([A-Z_a-z][0-9A-Z_a-z]*)//
343							and return _Identifier($parser, $1);
344
345	0	0					s/^(\+=)//
346							and return ($1, $1);
347	0	0					s/^(\-=)//
348							and return ($1, $1);
349	0	0					s/^(\*=)//
350							and return ($1, $1);
351	0	0					s/^(\/=)//
352							and return ($1, $1);
353	0	0					s/^(&=)//
354							and return ($1, $1);
355	0	0					s/^(\\|=)//
356							and return ($1, $1);
357	0	0					s/^(\^=)//
358							and return ($1, $1);
359	0	0					s/^(%=)//
360							and return ($1, $1);
361	0	0					s/^(<<=)//
362							and return ($1, $1);
363	0	0					s/^(>>=)//
364							and return ($1, $1);
365	0	0					s/^(>>>=)//
366							and return ($1, $1);
367	0	0					s/^(div=)//
368							and return ($1, $1);
369	0	0					s/^(&&)//
370							and return ($1, $1);
371	0	0					s/^(\\|\\|)//
372							and return ($1, $1);
373	0	0					s/^(\+\+)//
374							and return ($1, $1);
375	0	0					s/^(\-\-)//
376							and return ($1, $1);
377	0	0					s/^(<<)//
378							and return ($1, $1);
379	0	0					s/^(>>>)//
380							and return ($1, $1);
381	0	0					s/^(>>)//
382							and return ($1, $1);
383	0	0					s/^(<=)//
384							and return ($1, $1);
385	0	0					s/^(>=)//
386							and return ($1, $1);
387	0	0					s/^(==)//
388							and return ($1, $1);
389	0	0					s/^(!=)//
390							and return ($1, $1);
391
392	0	0					s/^([=><,!~\?:\.\+\-\*\/&\\|\^%\{\};#])//
393							and return ($1, $1); # punctuator
394
395	0	0					s/^([\S]+)//
396							and $parser->Error("lexer error $1.\n"),
397							last;
398							}
399							}
400							}
401
402							sub InitLexico {
403	0			0	0		my ($parser) = @_;
404
405	0						my %keywords = (
406							# Literal
407							'true' => 'TRUE_LITERAL',
408							'false' => 'FALSE_LITERAL',
409							'invalid' => 'INVALID_LITERAL',
410							# Keyword
411							'access' => 'ACCESS',
412							'agent' => 'AGENT',
413							'break' => 'BREAK',
414							'continue' => 'CONTINUE',
415							'div' => 'DIV',
416							'domain' => 'DOMAIN',
417							'else' => 'ELSE',
418							'equiv' => 'EQUIV',
419							'extern' => 'EXTERN',
420							'for' => 'FOR',
421							'function' => 'FUNCTION',
422							'header' => 'HEADER',
423							'http' => 'HTTP',
424							'if' => 'IF',
425							'isvalid' => 'ISVALID',
426							'meta' => 'META',
427							'name' => 'NAME',
428							'path' => 'PATH',
429							'return' => 'RETURN',
430							'typeof' => 'TYPEOF',
431							'use' => 'USE',
432							'user' => 'USER',
433							'var' => 'VAR',
434							'while' => 'WHILE',
435							'url' => 'URL',
436							);
437	0						my %invalid_keywords = (
438							# Keyword not used
439							'delete' => 'DELETE',
440							'in' => 'IN',
441							'lib' => 'LIB',
442							'new' => 'NEW',
443							'null' => 'NULL',
444							'this' => 'THIS',
445							'void' => 'VOID',
446							'with' => 'WITH',
447							# Future reserved word
448							'case' => 'CASE',
449							'catch' => 'CATCH',
450							'class' => 'CLASS',
451							'const' => 'CONST',
452							'debugger' => 'DEBUGGER',
453							'default' => 'DEFAULT',
454							'do' => 'DO',
455							'enum' => 'ENUM',
456							'export' => 'EXPORT',
457							'extends' => 'EXTENDS',
458							'finally' => 'FINALLY',
459							'import' => 'IMPORT',
460							'private' => 'PRIVATE',
461							'public' => 'PUBLIC',
462							'sizeof' => 'SIZEOF',
463							'struct' => 'STRUCT',
464							'super' => 'SUPER',
465							'switch' => 'SWITCH',
466							'throw' => 'THROW',
467							'try' => 'TRY',
468							);
469
470	0						$parser->YYData->{keyword} = \%keywords;
471	0						$parser->YYData->{invalid_keyword} = \%invalid_keywords;
472	0						return;
473							}
474
475							1;
476