File Coverage

blib/lib/Decl/DefaultParsers.pm

Criterion	Covered	Total	%
statement	254	260	97.6
branch	73	80	91.2
condition	2	3	66.6
subroutine	19	19	100.0
pod	6	6	100.0
total	354	368	96.2

line	stmt	bran	cond	sub	pod	time	code
1							package Decl::DefaultParsers;
2
3	12			12		77	use warnings;
	12					23
	12					447
4	12			12		65	use strict;
	12					26
	12					384
5	12			12		74	use Decl::Parser;
	12					27
	12					241
6	12			12		71	use Decl::Util;
	12					24
	12					1021
7	12			12		78	use Decl::Node;
	12					27
	12					398
8	12			12		71	use Data::Dumper;
	12					35
	12					41898
9
10
11							=head1 NAME
12
13							Decl::DefaultParsers - implements the default parsers for the Declarative language.
14
15							=head1 VERSION
16
17							Version 0.01
18
19							=cut
20
21							our $VERSION = '0.01';
22
23
24							=head1 SYNOPSIS
25
26							This isn't really an object module; it's just a convenient place to stash the default parsers we use, in order to make it easier to work with the Decl code.
27
28							=head2 init_default_line_parser(), init_default_body_parser(), init_locator_parser(), including locally defined is_blank, is_blank_or_comment, and line_indentation
29
30							These are called by C to initialize our various sublanguage parsers. You don't need to call them.
31
32							=cut
33
34							sub init_default_line_parser {
35	12			12	1	28	my ($self) = @_;
36
37							# Default line parser.
38	12					119	my $p = Decl::Parser->new();
39
40	12					75	$p->add_tokenizer ('CODEBLOCK'); # TODO: parameterizable, perhaps.
41							$p->add_tokenizer ('STRING', "'(?:\\.\|[^'])'\|\"(?:\\.\|[^\"])\"",
42							sub {
43	105			105		245	my $s = shift;
44	105					475	$s =~ s/.//;
45	105					499	$s =~ s/.$//;
46	105					251	$s =~ s/\\(['"])/$1/g;
47	105					217	$s =~ s/\\\\/\\/g;
48	105					223	$s =~ s/\\n/\n/g;
49	105					201	$s =~ s/\\t/\t/g;
50	105					841	['STRING', $s]
51	12					95	}); # TODO: this should be globally available.
52	12					54	$p->add_tokenizer ('BRACKET', '{.*');
53	12					71	$p->add_tokenizer ('COMMENT', '#.*');
54	12					74	$p->add_tokenizer ('WHITESPACE*', '\s+');
55	12					52	$p->add_tokenizer ('EQUALS', '=');
56	12					42	$p->add_tokenizer ('COMMA', ',');
57	12					45	$p->add_tokenizer ('LPAREN', '\(');
58	12					42	$p->add_tokenizer ('RPAREN', '\)');
59	12					45	$p->add_tokenizer ('LBRACK', '\[');
60	12					45	$p->add_tokenizer ('RBRACK', '\]');
61	12					46	$p->add_tokenizer ('LT', '<');
62
63	12					70	$p->add_rule ('line', 'p_and(optional(), optional(), optional(), optional (), optional(), optional(`), optional(), optional(), \&end_of_input)');`
64	12					48	$p->add_rule ('name', 'one_or_more(\&word)');
65	12					45	$p->add_rule ('parmlist', 'p_and(token_silent(["LPAREN"]), list_of(, "COMMA*"), token_silent(["RPAREN"]))');
66	12					52	$p->add_rule ('parm', 'p_or(, one_or_more(\&word))');
67	12					52	$p->add_rule ('parmval', 'p_and(\&word, token_silent(["EQUALS"]), )');
68	12					49	$p->add_rule ('value', 'p_or(\&word, token(["STRING"]))');
69	12					48	$p->add_rule ('optionlist', 'p_and(token_silent(["LBRACK"]), list_of(, "COMMA*"), token_silent(["RBRACK"]))');
70	12					45	$p->add_rule ('label', 'token(["STRING"])');
71	12					47	$p->add_rule ('parser', 'p_and(\&word, token_silent(["LT"]))');
72	12					46	$p->add_rule ('code', 'token(["CODEBLOCK"])');
73	12					51	$p->add_rule ('bracket', 'token(["BRACKET"])');
74	12					49	$p->add_rule ('comment', 'token(["COMMENT"])');
75
76							$p->action ('input', sub {
77	977			977		1662	my ($parser, $node, $input) = @_;
78	977	50				2866	if (not ref $node) {
79	0	0				0	$node = 'tag' unless defined $node;
80	0					0	$node = Decl::Node->new($node);
81							}
82	977					2397	$parser->{user}->{node} = $node;
83	977	100				4956	$input = $node->line() unless $input;
84	12					105	});
85							$p->action ('output', sub {
86	977			977		1657	my ($parse_result, $parser) = @_;
87	977					2754	my $node = $parser->{user}->{node};
88	977	100	66			4728	if (defined $parse_result and car($parse_result) eq 'line') {
89	975					14732	foreach my $piece (@{$parse_result->[1]}) {
	975					2823
90	366	100				1097	if (car($piece) eq 'name') {
		100
		100
		100
91	88					209	my @names = map { cdr $_ } @{cdr($piece)};
	106					300
	88					468
92	88					333	$node->{name} = $names[0];
93	88					565	$node->{namelist} = \@names;
94							} elsif (car($piece) eq 'parmlist') {
95	165					404	my @parmlist = ();
96	165					256	foreach my $parm (@{cdr($piece)}) {
	165					511
97	200					570	my $value = cdr($parm);
98	200	100				583	if (car($value) eq 'parmval') {
99	188					592	my $parameter = cdr(car(cdr($value)));
100	188					593	my $val = cdr(cdr(cdr(cdr($value))));
101	188					509	push @parmlist, $parameter;
102	188					1084	$node->{parameters}->{$parameter} = $val;
103							} else {
104	12					35	my @words = map { cdr $_ } @$value;
	12					37
105	12					42	my $parameter = join ' ', @words;
106	12					26	push @parmlist, $parameter;
107	12					66	$node->{parameters}->{$parameter} = 'yes';
108							}
109							}
110	165					822	$node->{parmlist} = \@parmlist;
111							} elsif (car($piece) eq 'optionlist') {
112	6					16	my @parmlist = ();
113	6					16	foreach my $parm (@{cdr($piece)}) {
	6					24
114	13					45	my $value = cdr($parm);
115	13	100				44	if (car($value) eq 'parmval') {
116	1					4	my $parameter = cdr(car(cdr($value)));
117	1					26	my $val = cdr(cdr(cdr(cdr($value))));
118	1					4	push @parmlist, $parameter;
119	1					6	$node->{options}->{$parameter} = $val;
120							} else {
121	12					27	my @words = map { cdr $_ } @$value;
	15					39
122	12					35	my $parameter = join ' ', @words;
123	12					26	push @parmlist, $parameter;
124	12					61	$node->{options}->{$parameter} = 'yes';
125							}
126							}
127	6					26	$node->{optionlist} = \@parmlist;
128							} elsif (car($piece) eq 'parser') {
129	1					4	$node->{parser} = cdr car cdr $piece;
130							} else {
131	106					306	$node->{car($piece)} = cdr(cdr($piece)); # Elegance! We likes it, precioussss.
132							}
133							}
134							}
135	977					6581	return $node;
136	12					141	});
137
138	12					59	$p->build();
139	12					74	return $p;
140							}
141
142							sub init_default_body_parser {
143	12			12	1	45	my ($self) = @_;
144
145							# Default body parser.
146	12					73	my $p = Decl::Parser->new();
147
148	12					73	$p->add_tokenizer ('BLANKLINE', '\n\n+');
149	12					61	$p->add_tokenizer ('NEWLINE*', '\n');
150	12					55	$p->add_rule ('body', 'series(p_or(\&word, token("BLANKLINE")))');
151							$p->action ('input', sub {
152	772			772		1600	my ($parser, $context, $parent, $input) = @_;
153	772					3180	$input
154	12					97	});
155							$p->action ('output', sub {
156	772			772		1572	my ($parse_result, $parser, $context, $parent, $input) = @_;
157	772					1442	my @results = ();
158	772					1148	my @nodes_made = ();
159	772					2845	my $root = $parent->root();
160	772	50				3412	return () unless popcar($parse_result) eq 'body';
161	772					1440	my $indent = 0;
162	772					1089	my $lineindent = 0;
163	772					1021	my $thisindent = 0;
164	772					1357	my $curtext = '';
165	772					1041	my $tag = '';
166	772					1007	my $blanks = '';
167	772					993	my $firstcode = '';
168	772					920	my $rest;
169	772					1047	my $spaces = '';
170	772					1190	my $bracket = '';
171
172							my $starttag = sub {
173	965					1605	my ($line) = @_;
174	965	50				3100	if ($line =~ /^(\s+)/) {
175	0					0	$lineindent = length ($1);
176	0					0	$line =~ s/^\s*//; # Discard any indentation before the tag line
177							} else {
178	965					1495	$lineindent = 0;
179							}
180	965	100				2194	if ($curtext) {
181	18					45	push @results, $curtext;
182							}
183	965					1783	$curtext = $line . "\n";
184	965					2622	($tag, $rest) = split /\s+/, $line, 2;
185	965					3694	$indent = 0;
186	772					4506	};
187
188							my $concludetag = sub {
189							# print STDERR "---- concludetag: $tag\n";
190	965					3986	my $newnode = $context->makenode($parent, $tag, $curtext);
191	965					1857	$newnode->{parent} = $parent;
192	965					1537	push @results, $newnode;
193	965					1411	push @nodes_made, $newnode;
194	965					1422	$tag = '';
195	965					1254	$curtext = '';
196	965					2042	$indent = 0;
197	772					3523	};
198	1407			1407	1	9327	sub is_blank { $_[0] =~ /^(\s\|\n)*$/ };
199							sub is_blank_or_comment {
200	991			991	1	1846	$_ = shift;
201	991	100				4980	/^\s*#/ \|\| is_blank ($_)
202							};
203							sub line_indentation {
204	419	100		419	1	1322	if ($_[0] =~ /^(\s+)/) {
205	189					777	length($1)
206							} else {
207	230					522	0
208							}
209							}
210
211							# print STDERR "\n\n----- Starting " . $parent->tag . " with:\n$input-----------------------\n";
212	772					2518	foreach (@$parse_result) {
213	1231					4191	my ($type, $line) = splitcar ($_);
214	1231					2500	my $testline = $line;
215	1231					2522	$testline =~ s/\n/\\n/g;
216							# print STDERR "$testline : ";
217	1231	100				2792	$line =~ s/\n*// if $type; # If we have a BLANKLINE token, there are one too many \n's in there.
218	1231	100				2429	if (not $tag) { # We're in a blank-and-comment stretch
219	812	100				2305	if (is_blank_or_comment($line)) {
220							# print STDERR "blank-or-comment\n";
221	25					101	$curtext .= $line . "\n";
222							} else {
223							# print STDERR "start tag\n";
224	787					2188	$starttag->($line);
225							}
226							} else { # We're in a tag
227	419	100				885	if (not $indent) { # We just started it, though.
228	248					597	$indent = line_indentation($line);
229	248	100				679	if ($indent <= $lineindent) { # And the first line after the starting line is already back-indented!
		100
230	173	100				338	if (is_blank($line)) { # This is a blank line, though, so it may not count as indented.
231							# print STDERR "blank line at start of tag\n";
232	16					42	$blanks .= $line; # We'll stash it and try again.
233	16					68	$indent = 0;
234							} else { # It's not a blank; it's either a new tag, or a comment.
235	157					325	$concludetag->();
236	157	50				391	if (is_blank_or_comment($line)) {
237							# print STDERR "blank-or-comment\n";
238	0					0	$curtext = $blanks . $line . "\n";
239	0					0	$blanks = '';
240							} else {
241	157	100				402	if ($blanks) {
242							# print STDERR "(had some leftover blanks) ";
243	3					8	push @results, $blanks;
244	3					10	$blanks = '';
245							}
246							# print STDERR ("starting new tag\n");
247	157					339	$starttag->($line);
248							}
249							}
250							} elsif (is_blank ($line)) {
251							# print STDERR "blank line at start of tag with longer indent\n";
252	6					16	$blanks .= $line; # Stash it and keep going.
253	6					34	$indent = $lineindent; # 2010-07-24 - and don't let 'indent' get updated
254							} else { # This is the first line of the body, because it's indented further than the opening line.
255	69					281	$spaces = ' ' x $indent;
256	69					721	$line =~ s/^$spaces//;
257	69	100				250	if ($blanks) {
258							# print STDERR "(had blanks) ";
259	1					3	$curtext .= $blanks;
260	1					2	$blanks = '';
261							}
262							# print STDERR "first line of body\n";
263	69					346	$curtext .= $line . "\n";
264							}
265							} else {
266	171	100				340	if (line_indentation ($line) < $indent) { # A new back-indentation!
		100
267	65	100				161	if (is_blank($line)) { # If this is blank, we don't add it to the body until there's more to add.
		100
		100
268							# print STDERR ("stash blank line\n");
269	18					83	$blanks .= $line . "\n";
270							} elsif ($line =~ /^\s*}/) { # Closing bracket; we don't check for matching brackets; the closing bracket is really just a sort of comment.
271							# print STDERR ("closing bracket\n");
272	25					85	$concludetag->();
273							} elsif (is_blank_or_comment($line)) { # Comment; this by definition belongs to the parent.
274							# print STDERR ("back-indented comment, denoting end of last tag\n");
275	1					4	$concludetag->();
276	1					3	$curtext = $blanks . $line . "\n";
277	1					5	$blanks = '';
278							} else { # Next tag line.
279	21					58	$concludetag->();
280	21	100				70	if ($blanks) {
281							# print STDERR "(had some blanks) ";
282	14					39	push @results, $blanks;
283	14					30	$blanks = '';
284							}
285							# print STDERR "starting tag!\n";
286	21					65	$starttag->($line);
287							}
288							} elsif (is_blank ($line)) { # This blank line may fall between nodes, or be part of the current one.
289							# print STDERR "stash blank line within body\n";
290	15					83	$blanks .= $line . "\n";
291							} else { # Normal body line; toss it into the mix.
292	91					518	$line =~ s/^$spaces//;
293	91	100				246	if ($blanks) { # If we've stashed some blanks, add them back.
294							# print STDERR "(had some blanks) ";
295	8					13	$curtext .= $blanks;
296	8					15	$blanks = '';
297							}
298							# print STDERR "body line >> $line\n";
299	91					352	$curtext .= $line . "\n";
300							}
301							}
302							}
303							}
304	772	100				1936	if ($curtext) {
305	767	100				1481	if ($tag) {
306							# print STDERR "FINAL: had a tag\n";
307	761					1577	$concludetag->();
308							} else {
309							# print STDERR "FINAL: extra text\n";
310	6					19	push @results, $curtext;
311							}
312							}
313	772	100				1735	if ($blanks) {
314							# print STDERR "FINAL: extra blanks\n";
315	12					34	push @results, $blanks;
316							}
317	772					2855	$parent->{elements} = [$parent->elements, @results];
318							@nodes_made
319	12					137	});
	772					12054
320
321	12					45	$p->build(); # Forgetting this cost me several hours of debugging...
322	12					52	return $p;
323							}
324
325							sub init_locator_parser {
326	12			12	1	29	my ($self) = @_;
327
328	12					69	my $p = Decl::Parser->new();
329
330							$p->add_tokenizer ('STRING', "'(?:\\.\|[^'])'\|\"(?:\\.\|[^\"])\"",
331							sub {
332	4			4		10	my $s = shift;
333	4					19	$s =~ s/.//;
334	4					16	$s =~ s/.$//;
335	4					11	$s =~ s/\\(['"])/$1/g;
336	4					7	$s =~ s/\\\\/\\/g;
337	4					10	$s =~ s/\\n/\\n/g;
338	4					6	$s =~ s/\\t/\\t/g;
339	4					37	['STRING', $s]
340	12					102	});
341	12					59	$p->add_tokenizer ('WHITESPACE*', '\s+');
342	12					66	$p->add_tokenizer ('MATCHES', '=~');
343	12					116	$p->add_tokenizer ('EQUALS', '=');
344	12					73	$p->add_tokenizer ('SEPARATOR', '[.:/]');
345	12					56	$p->add_tokenizer ('LPAREN', '\(');
346	12					45	$p->add_tokenizer ('RPAREN', '\)');
347	12					48	$p->add_tokenizer ('LBRACK', '\[');
348	12					50	$p->add_tokenizer ('RBRACK', '\]');
349
350	12					55	$p->add_rule ('locator', 'list_of(, "SEPARATOR*")');
351	12					46	$p->add_rule ('tag', 'p_and(\&word, p_or (, , , , \¬hing))');
352	12					52	$p->add_rule ('name', 'p_and(token_silent(["LBRACK"]), one_or_more(\&word), token_silent(["RBRACK"]))');
353	12					54	$p->add_rule ('attribute', 'p_and(token_silent(["LBRACK"]), \&word, token_silent(["EQUALS"]), p_or(\&word, token (["STRING"])), token_silent(["RBRACK"]))');
354	12					51	$p->add_rule ('match', 'p_and(token_silent(["LBRACK"]), \&word, token_silent(["MATCHES"]), p_or(\&word, token (["STRING"])), token_silent(["RBRACK"]))');
355	12					48	$p->add_rule ('offset', 'p_and(token_silent(["LPAREN"]), \&word, token_silent(["RPAREN"]))');
356
357							$p->action ('output', sub {
358	320			320		728	my ($parse_result, $parser) = @_;
359	320					2443	my $list = cdr $parse_result;
360	320					1557	my @pieces = ();
361	320					947	foreach (@$list) {
362	339					1775	my $t = cdr $_;
363	339					1132	my $tag = cdr car $t;
364	339					1033	my $rest = cdr $t;
365	339	100				876	if (defined $rest) {
366	17					37	my ($type, $spec) = @$rest;
367	17	100				77	if ($type eq 'name') {
		100
		100
		50
368	14					29	my @names = map { cdr $_ } @$spec;
	15					43
369	14					80	push @pieces, [$tag, @names];
370							} elsif ($type eq 'attribute') {
371	1					5	push @pieces, [$tag, ['a', cdr car $spec, cdr cdr $spec]];
372							} elsif ($type eq 'match') {
373	1					6	push @pieces, [$tag, ['m', cdr car $spec, cdr cdr $spec]];
374							} elsif ($type eq 'offset') {
375	1					6	push @pieces, [$tag, ['o', cdr car $spec]];
376							}
377							} else {
378	322					1235	push @pieces, $tag;
379							}
380							}
381	320					2557	return \@pieces;
382	12					114	});
383
384	12					46	$p->build();
385	12					49	return $p;
386							}
387							=head1 AUTHOR
388
389							Michael Roberts, C<< >>
390
391							=head1 BUGS
392
393							Please report any bugs or feature requests to C, or through
394							the web interface at L. I will be notified, and then you'll
395							automatically be notified of progress on your bug as I make changes.
396
397							=head1 LICENSE AND COPYRIGHT
398
399							Copyright 2011 Michael Roberts.
400
401							This program is free software; you can redistribute it and/or modify it
402							under the terms of either: the GNU General Public License as published
403							by the Free Software Foundation; or the Artistic License.
404
405							See http://dev.perl.org/licenses/ for more information.
406
407							=cut
408
409							1; # End of Decl::DefaultParsers