File Coverage

blib/lib/TOML/Parser/Tokenizer.pm

Criterion	Covered	Total	%
statement	201	204	98.5
branch	74	82	90.2
condition	17	26	65.3
subroutine	19	19	100.0
pod	0	2	0.0
total	311	333	93.3

line	stmt	bran	cond	sub	pod	time	code
1							package TOML::Parser::Tokenizer;
2	20			20		352	use 5.010000;
	20					65
3	20			20		100	use strict;
	20					38
	20					381
4	20			20		97	use warnings;
	20					35
	20					553
5
6	20			20		95	use Exporter 5.57 'import';
	20					242
	20					799
7
8	20	50		20		117	use constant DEBUG => $ENV{TOML_PARSER_TOKENIZER_DEBUG} ? 1 : 0;
	20					36
	20					3106
9
10							BEGIN {
11	20			20		240	my @TOKENS = map uc, qw/
12							comment
13							table
14							array_of_table
15							key
16							integer
17							float
18							boolean
19							datetime
20							string
21							multi_line_string_begin
22							multi_line_string_end
23							inline_table_begin
24							inline_table_end
25							array_begin
26							array_end
27							/;
28							my %CONSTANTS = map {
29	20					51	("TOKEN_$_" => $_)
	300					831
30							} @TOKENS;
31
32	20					126	require constant;
33	20					2429	constant->import(\%CONSTANTS);
34
35							# Exporter
36	20					96	our @EXPORT_OK = keys %CONSTANTS;
37	20					43984	our %EXPORT_TAGS = (
38							constant => [keys %CONSTANTS],
39							);
40							};
41
42							sub grammar_regexp {
43							return +{
44	878			878	0	16834	comment => qr{#(.*)},
45							table => {
46							start => qr{\[},
47							key => qr{(?:"(.*?)(?
48							sep => qr{\.},
49							end => qr{\]},
50							},
51							array_of_table => {
52							start => qr{\[\[},
53							key => qr{(?:"(.*?)(?
54							sep => qr{\.},
55							end => qr{\]\]},
56							},
57							key => qr{(?:"(.*?)(?
58							value => {
59							datetime => qr{([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:Z\|[-+][0-9]{2}:[0-9]{2}))},
60							float => qr{([-+]?(?:[0-9_]+(?:\.[0-9_]+)?[eE][-+]?[0-9_]+\|[0-9_]*\.[0-9_]+))},
61							integer => qr{([-+]?[0-9_]+)},
62							boolean => qr{(true\|false)},
63							string => qr{(?:"(.*?)(?
64							mlstring => qr{("""\|''')},
65							inline => {
66							start => qr{\{},
67							sep => qr{\s,\s},
68							end => qr{\}},
69							},
70							array => {
71							start => qr{\[},
72							sep => qr{\s,\s},
73							end => qr{\]},
74							},
75							},
76							};
77							}
78
79							sub tokenize {
80	56			56	0	158	my ($class, $src) = @_;
81
82	56					114	local $_ = $src;
83	56					197	return $class->_tokenize();
84							}
85
86							sub _tokenize {
87	56			56		109	my $class = shift;
88	56					175	my $grammar_regexp = $class->grammar_regexp();
89
90	56					127	my @tokens;
91	56					251	until (/\G\z/mgco) {
92	467	100				3231	if (/\G$grammar_regexp->{comment}/mgc) {
		100
		100
		100
		100
93	72					106	warn "[TOKEN] COMMENT: $1" if DEBUG;
94	72					171	$class->_skip_whitespace();
95	72		100			644	push @tokens => [TOKEN_COMMENT, $1 \|\| ''];
96							}
97							elsif (/\G$grammar_regexp->{array_of_table}->{start}/mgc) {
98	23					71	push @tokens => $class->_tokenize_array_of_table();
99							}
100							elsif (/\G$grammar_regexp->{table}->{start}/mgc) {
101	49					137	push @tokens => $class->_tokenize_table();
102							}
103							elsif (my @t = $class->_tokenize_key_and_value()) {
104	225					811	push @tokens => @t;
105							}
106							elsif (/\G\s+/mgco) {
107							# pass through
108	89					193	$class->_skip_whitespace();
109							}
110							else {
111	3					15	$class->_syntax_error();
112							}
113							}
114	36					429	return @tokens;
115							}
116
117							sub _tokenize_key_and_value {
118	369			369		601	my $class = shift;
119	369					703	my $grammar_regexp = $class->grammar_regexp();
120
121	369					557	my @tokens;
122	369	100				3461	if (/\G$grammar_regexp->{key}/mgc) {
123	277		66			1555	my $key = $1 \|\| $2 \|\| $3;
124	277					395	warn "[TOKEN] KEY: $key" if DEBUG;
125	277					700	$class->_skip_whitespace();
126	277					657	push @tokens => [TOKEN_KEY, $key];
127	277					666	push @tokens => $class->_tokenize_value();
128	271					1906	return @tokens;
129							}
130
131	92					766	return;
132							}
133
134							sub _tokenize_value {
135	332			332		470	my $class = shift;
136	332					630	my $grammar_regexp = $class->grammar_regexp();
137	332					494	warn "[CALL] _tokenize_value" if DEBUG;
138
139	332	50				6989	if (/\G$grammar_regexp->{comment}/mgc) {
		100
		100
		100
		100
		100
		100
		100
		100
140	0					0	warn "[TOKEN] COMMENT: $1" if DEBUG;
141	0					0	$class->_skip_whitespace();
142	0		0			0	return [TOKEN_COMMENT, $1 \|\| ''];
143							}
144							elsif (/\G$grammar_regexp->{value}->{datetime}/mgc) {
145	2					4	warn "[TOKEN] DATETIME: $1" if DEBUG;
146	2					5	$class->_skip_whitespace();
147	2					14	return [TOKEN_DATETIME, $1];
148							}
149							elsif (/\G$grammar_regexp->{value}->{float}/mgc) {
150	20					27	warn "[TOKEN] FLOAT: $1" if DEBUG;
151	20					40	$class->_skip_whitespace();
152	20					139	return [TOKEN_FLOAT, $1];
153							}
154							elsif (/\G$grammar_regexp->{value}->{integer}/mgc) {
155	78					109	warn "[TOKEN] INTEGER: $1" if DEBUG;
156	78					173	$class->_skip_whitespace();
157	78					630	return [TOKEN_INTEGER, $1];
158							}
159							elsif (/\G$grammar_regexp->{value}->{boolean}/mgc) {
160	4					9	warn "[TOKEN] BOOLEAN: $1" if DEBUG;
161	4					10	$class->_skip_whitespace();
162	4					30	return [TOKEN_BOOLEAN, $1];
163							}
164							elsif (/\G$grammar_regexp->{value}->{mlstring}/mgc) {
165	21					37	warn "[TOKEN] MULTI LINE STRING: $1" if DEBUG;
166							return (
167	21					72	[TOKEN_MULTI_LINE_STRING_BEGIN],
168							$class->_extract_multi_line_string($1),
169							[TOKEN_MULTI_LINE_STRING_END],
170							);
171							}
172							elsif (/\G$grammar_regexp->{value}->{string}/mgc) {
173	155					265	warn "[TOKEN] STRING: $1" if DEBUG;
174	155					370	$class->_skip_whitespace();
175
176	155					316	my $is_raw = defined $2;
177	155	50				1504	return [TOKEN_STRING, defined $1 ? $1 : defined $2 ? $2 : '', $is_raw];
		100
178							}
179							elsif (/\G$grammar_regexp->{value}->{inline}->{start}/mgc) {
180	17					26	warn "[TOKEN] INLINE TABLE" if DEBUG;
181	17					51	$class->_skip_whitespace();
182							return (
183	17					51	[TOKEN_INLINE_TABLE_BEGIN],
184							$class->_tokenize_inline_table(),
185							[TOKEN_INLINE_TABLE_END],
186							);
187							}
188							elsif (/\G$grammar_regexp->{value}->{array}->{start}/mgc) {
189	32					48	warn "[TOKEN] ARRAY" if DEBUG;
190	32					81	$class->_skip_whitespace();
191							return (
192	32					136	[TOKEN_ARRAY_BEGIN],
193							$class->_tokenize_array(),
194							[TOKEN_ARRAY_END],
195							);
196							}
197
198	3					17	$class->_syntax_error();
199							}
200
201							sub _tokenize_table {
202	49			49		77	my $class = shift;
203
204	49					98	my $grammar_regexp = $class->grammar_regexp()->{table};
205	49					247	warn "[CALL] _tokenize_table" if DEBUG;
206
207	49					127	$class->_skip_whitespace();
208
209	49					110	my @expected = ($grammar_regexp->{key});
210
211	49					75	my @keys;
212							LOOP:
213	49					64	while (1) {
214	157					278	for my $rx (@expected) {
215	201	100				3599	if (/\G$rx/smgc) {
216	151	100				492	if ($rx eq $grammar_regexp->{key}) {
		100
		50
217	76		66			407	my $key = $1 \|\| $2 \|\| $3;
218	76					105	warn "[TOKEN] table key: $key" if DEBUG;
219	76					151	push @keys => $key;
220	76					166	@expected = ($grammar_regexp->{sep}, $grammar_regexp->{end});
221							}
222							elsif ($rx eq $grammar_regexp->{sep}) {
223	32					101	warn "[TOKEN] table key separator" if DEBUG;
224	32					68	@expected = ($grammar_regexp->{key});
225							}
226							elsif ($rx eq $grammar_regexp->{end}) {
227	43					52	warn "[TOKEN] table key end" if DEBUG;
228	43					64	@expected = ();
229	43					101	last LOOP;
230							}
231	108					250	$class->_skip_whitespace();
232	108					250	next LOOP;
233							}
234							}
235
236	6					29	$class->_syntax_error();
237							}
238
239	43					58	warn "[TOKEN] TABLE: @{[ join '.', @keys ]}" if DEBUG;
240	43					249	return [TOKEN_TABLE, \@keys];
241							}
242
243							sub _tokenize_array_of_table {
244	23			23		41	my $class = shift;
245
246	23					55	my $grammar_regexp = $class->grammar_regexp()->{array_of_table};
247	23					120	warn "[CALL] _tokenize_array_of_table" if DEBUG;
248
249	23					99	$class->_skip_whitespace();
250
251	23					51	my @expected = ($grammar_regexp->{key});
252
253	23					34	my @keys;
254							LOOP:
255	23					35	while (1) {
256	57					99	for my $rx (@expected) {
257	75	100				1359	if (/\G$rx/smgc) {
258	52	100				172	if ($rx eq $grammar_regexp->{key}) {
		100
		50
259	26		66			157	my $key = $1 \|\| $2 \|\| $3;
260	26					38	warn "[TOKEN] table key: $key" if DEBUG;
261	26					49	push @keys => $key;
262	26					56	@expected = ($grammar_regexp->{sep}, $grammar_regexp->{end});
263							}
264							elsif ($rx eq $grammar_regexp->{sep}) {
265	8					14	warn "[TOKEN] table key separator" if DEBUG;
266	8					16	@expected = ($grammar_regexp->{key});
267							}
268							elsif ($rx eq $grammar_regexp->{end}) {
269	18					20	warn "[TOKEN] table key end" if DEBUG;
270	18					28	@expected = ();
271	18					32	last LOOP;
272							}
273	34					82	$class->_skip_whitespace();
274	34					83	next LOOP;
275							}
276							}
277
278	5					20	$class->_syntax_error();
279							}
280
281	18					22	warn "[TOKEN] ARRAY_OF_TABLE: @{[ join '.', @keys ]}" if DEBUG;
282	18					102	return [TOKEN_ARRAY_OF_TABLE, \@keys];
283							}
284
285							sub _extract_multi_line_string {
286	21			21		73	my ($class, $delimiter) = @_;
287	21					48	my $is_raw = $delimiter eq q{'''};
288	21	100				211	if (/\G(.+?)\Q$delimiter/smgc) {
289	20					30	warn "[TOKEN] MULTI LINE STRING: $1" if DEBUG;
290	20					46	$class->_skip_whitespace();
291	20					197	return [TOKEN_STRING, $1, $is_raw];
292							}
293	1					6	$class->_syntax_error();
294							}
295
296							sub _tokenize_inline_table {
297	17			17		23	my $class = shift;
298
299	17					36	my $common_grammar_regexp = $class->grammar_regexp();
300	17					35	my $grammar_regexp = $common_grammar_regexp->{value}->{inline};
301
302	17					23	warn "[CALL] _tokenize_inline_table" if DEBUG;
303	17	50				145	return if /\G(?:$grammar_regexp->{sep})?$grammar_regexp->{end}/smgc;
304
305	17					28	my $need_sep = 0;
306
307	17					23	my @tokens;
308	17					26	while (1) {
309	94					109	warn "[CONTEXT] _tokenize_inline_table [loop]" if DEBUG;
310
311	94					237	$class->_skip_whitespace();
312	94	100				433	if (/\G$common_grammar_regexp->{comment}/mgc) {
		100
313	2					4	warn "[TOKEN] COMMENT: $1" if DEBUG;
314	2		50			11	push @tokens => [TOKEN_COMMENT, $1 \|\| ''];
315	2					4	next;
316							}
317							elsif (/\G$grammar_regexp->{end}/mgc) {
318	16					29	last;
319							}
320
321	76	100				132	if ($need_sep) {
322	30	100				156	if (/\G$grammar_regexp->{sep}/smgc) {
323	29					47	$need_sep = 0;
324	29					53	next;
325							}
326							}
327							else {
328	46	50				97	if (my @t = $class->_tokenize_key_and_value()) {
329	46					76	push @tokens => @t;
330	46					54	$need_sep = 1;
331	46					80	next;
332							}
333							}
334
335	1					6	$class->_syntax_error();
336							}
337
338	16					183	return @tokens;
339							}
340
341							sub _tokenize_array {
342	32			32		50	my $class = shift;
343
344	32					70	my $common_grammar_regexp = $class->grammar_regexp();
345	32					63	my $grammar_regexp = $common_grammar_regexp->{value}->{array};
346
347	32					40	warn "[CALL] _tokenize_array" if DEBUG;
348	32	100				334	return if /\G(?:$grammar_regexp->{sep})?$grammar_regexp->{end}/smgc;
349
350	30					54	my $need_sep = 0;
351
352	30					44	my @tokens;
353	30					38	while (1) {
354	126					171	warn "[CONTEXT] _tokenize_inline_table [loop]" if DEBUG;
355
356	126					282	$class->_skip_whitespace();
357	126	100				608	if (/\G$common_grammar_regexp->{comment}/mgc) {
		100
358	7					11	warn "[TOKEN] COMMENT: $1" if DEBUG;
359	7		50			29	push @tokens => [TOKEN_COMMENT, $1 \|\| ''];
360	7					36	next;
361							}
362							elsif (/\G$grammar_regexp->{end}/mgc) {
363	29					51	last;
364							}
365
366	90	100				150	if ($need_sep) {
367	35	100				210	if (/\G$grammar_regexp->{sep}/smgc) {
368	34					58	$need_sep = 0;
369	34					63	next;
370							}
371							}
372							else {
373	55	50				146	if (my @t = $class->_tokenize_value()) {
374	55					155	push @tokens => @t;
375	55					76	$need_sep = 1;
376	55					106	next;
377							}
378							}
379
380	1					4	$class->_syntax_error();
381							}
382
383	29					349	return @tokens;
384							}
385
386							sub _skip_whitespace {
387	1200			1200		1676	my $class = shift;
388	1200	100				3101	if (/\G\s+/smgco) {
389							# pass through
390	610					933	warn "[PASS] WHITESPACE" if DEBUG;
391							}
392							}
393
394	20			20		72	sub _syntax_error { shift->_error('Syntax Error') }
395
396							sub _error {
397	20			20		62	my ($class, $msg) = @_;
398
399	20					52	my $src = $_;
400	20		100			75	my $curr = pos \|\| 0;
401	20					44	my $line = 1;
402	20		50			91	my $start = pos $src \|\| 0;
403	20		66			154	while ($src =~ /$/smgco and pos $src <= $curr) {
404	28					47	$start = pos $src;
405	28					124	$line++;
406							}
407	20					50	my $end = pos $src;
408	20					45	my $len = $curr - $start;
409	20	100				60	$len-- if $len > 0;
410
411	20		100			152	my $trace = join "\n",
412							"${msg}: line:$line",
413							substr($src, $start \|\| 0, $end - $start),
414							(' ' x $len) . '^';
415	20					484	die $trace, "\n";
416							}
417
418							1;
419							__END__