File Coverage

blib/lib/Parser/Combinators.pm

Criterion	Covered	Total	%
statement	275	392	70.1
branch	116	214	54.2
condition	2	3	66.6
subroutine	36	53	67.9
pod	0	28	0.0
total	429	690	62.1

line	stmt	bran	cond	sub	pod	time	code
1							package Parser::Combinators;
2
3	2			2		72851	use strict;
	2					5
	2					64
4	2			2		54	use 5.008_005;
	2					6
	2					94
5							our $VERSION = '0.03';
6
7	2			2		10	use Exporter 'import';
	2					9
	2					133
8
9							@Parser::Combinators::EXPORT = qw(
10							sequence
11							choice
12							try
13							maybe
14							regex
15							parens
16							char
17							sepBy
18							oneOf
19							word
20							natural
21							symbol
22							greedyUpto
23							upto
24							many
25							many1
26							whiteSpace
27							comma
28							matches
29							unwrap
30							empty
31							getParseTree
32							bindP
33							returnP
34							$V
35							);
36
37	2			2		2302	use Data::Dumper;
	2					32937
	2					14626
38							$Data::Dumper::Indent=0;
39							$Data::Dumper::Terse =1;
40							our $V= 0;
41
42							# I want to write the parser using lists, because sequencing is the most common operation.
43							# So I need a function to generate the actual parser from the lists
44							# which is actually a sequence of parsers
45							# The first arg is a list ref, the second arg is an optional code ref to process the returned list of matches
46							sub sequence {
47	7			7	0	13	(my $plst, my $proc)=@_;
48							my $gen = sub {
49	17			17		2563	(my $str)=@_;
50	17	50				1601	print "* sequence($str)\n" if $V;
51	17					41	my $matches=[];
52	17					26	my $st=1;
53	17					23	my $str2='';
54	17					27	my $ms=undef;
55	17					19	for my $p (@{$plst}) {
	17					37
56	45	100				132	if (ref($p) eq 'CODE') {
		50
57	31					72	($st, $str, $ms)=$p->($str);
58							} elsif (ref($p) eq 'HASH') {
59	14					18	my %hp=%{$p};
	14					54
60	14					31	(my $k, my $pp) = each %hp;
61	14					31	($st, $str, my $mms)=$pp->($str);
62	14					57	$ms = {$k => $mms};
63							} else { # assuming it's ARRAY
64	0					0	my $p2 = sequence($p);
65	0					0	($st, $str, $ms)=$p2->($str)
66							}
67	45	100				121	if (!$st) {
68	2					8	return (0,$str,[]);
69							}
70	43					52	push @{$matches},$ms;# if scalar @{ $ms };
	43					123
71							}
72	15	50				35	if (defined($proc)) {
73	0	0				0	if( ref($proc) eq 'CODE') {
74	0					0	return (1,$str,$proc->($matches));
75							} else {
76	0	0				0	print 'TROUBLE: <',Dumper($plst),'><',Dumper($proc),'>' if $V;
77	0					0	return (1,$str,$matches);
78							}
79							} else {
80	15					52	return (1,$str,$matches)
81							}
82	7					60	};
83	7					33	return $gen;
84							}
85
86							# In the best tradition, bind() and return()
87							sub bindP {
88	0			0	0	0	(my $p1,my $p2) =@_;
89	0			0		0	my $gen = sub {(my $str1) =@_;
90	0	0				0	print "* bindP( \'$str1\' )\n" if $V;
91	0					0	my $matches=undef;
92	0					0	(my $st1,my $str2,my $m1) = $p1->( $str1 );
93	0					0	push @{$matches},$m1;
	0					0
94	0	0				0	if ($st1) {
95	0					0	print "bind: p1( $str1 ) matched,[$m1] try p2( $str2 )\n";
96	0					0	(my $st2,my $str3, my $m2) = $p2->( $str2 );
97	0	0				0	if(ref($m2) eq 'ARRAY' ){
		0
98	0					0	$matches =[ @{$matches},@{$m2} ];
	0					0
	0					0
99							} elsif (defined $m2) {
100	0					0	push @{$matches},$m2;
	0					0
101							}
102	0					0	return ($st2,$str3,$matches);
103							} else {
104	0					0	return (0,$str1,undef);
105							}
106	0					0	};
107	0					0	return $gen;
108							}
109
110							# Only we can't call it 'return' so let's call it enter :-)
111							sub returnP {
112							sub {
113	0			0		0	(my $str) = @_;
114	0					0	return (0,$str,undef);
115							}
116	0			0	0	0	}
117
118							# Choice: try every parser in the list until one succeeds or return fail. '<\|>' in Parsec
119							sub choice {
120	1			1	0	4	my @parsers=@_;
121	2			2		5	my $gen = sub { (my $str)= @_;
122	2	50				122	print "* choice('$str')\n" if $V;
123	2					7	for my $p (@parsers) {
124	3					5	my $status=0; my $matches=[];
	3					6
125	3	100				14	if (ref($p) eq 'CODE') {
		50
126	1					4	($status, $str, $matches)=$p->($str);
127							} elsif (ref($p) eq 'HASH') {
128	2					3	my %hp = %{$p};
	2					8
129	2					6	(my $k, my $pp) = each %hp;
130	2					12	($status, $str, my $mms)=$pp->($str);
131	2					9	$matches = {$k => $mms};
132							} else {
133	0					0	die Dumper($p);
134							}
135	3	100				11	if ($status) {
136	2	50				115	print "choice: remainder => <$str>\n" if $V;
137	2	50				14	print "choice: matches => [".Dumper($matches)."]\n" if $V;
138	2					352	return ($status, $str, $matches);
139							}
140							}
141	0					0	return (0, $str, []);
142	1					13	};
143	1					8	return $gen;
144							}
145							# Normally, when a parser parses a string, it removes the portion that matched. If you want to keep the string, wrap the parser in try()
146							sub try {
147	0			0	0	0	(my $p)=@_;
148							my $gen = sub {
149	0			0		0	(my $str)=@_;
150	0	0				0	print "* try( '$str' )\n" if $V;
151	0					0	(my $status, my $rest, my $matches)=$p->($str);
152	0	0				0	if ($status) {
153	0	0				0	print "try: remainder => <$rest>\n" if $V;
154	0	0				0	print "try: matches => [".Dumper($matches)."]\n" if $V;
155	0					0	return (1, $rest, $matches);
156							} else {
157	0	0				0	print "try: match failed => <$str>\n" if $V;
158	0					0	return (0, $str, $matches);
159							}
160	0					0	};
161	0					0	return $gen;
162							}
163
164							# maybe() is like try() but always succeeds
165							# it returns the matches and the consumed string or the orig string and no matches
166							sub maybe {
167	3			3	0	6	(my $p)=@_;
168							my $gen = sub {
169	9			9		16	(my $str)=@_;
170	9	50				622	print "* maybe('$str')\n" if $V;
171	9					27	(my $status, my $rest, my $matches)=$p->($str);
172	9	100				22	if ($status) {
173	6	50				56	print "maybe matches: [".Dumper($matches)."]\n" if $V;
174	6					738	return (1, $rest, $matches);
175							} else {
176	3	50				126	print "maybe: no matches for <$str>\n" if $V;
177	3					12	return (1, $str, undef);
178							}
179	3					13	};
180	3					13	return $gen;
181							}
182
183							# Enough rope: this parser will parse whatever the regex is, stripping trailing whitespace
184							sub regex {
185	1			1	0	2	(my $regex_str) = @_;
186							my $gen = sub {
187	6			6		9	(my $str)=@_;
188	6	50				455	print "* regex( '/$regex_str/', '$str' )\n" if $V;
189	6					10	my $matches=undef;
190	6	50				65	if(
191							$str=~s/($regex_str)\s*//
192							) {
193	6					17	my $m=$1;
194	6					8	$matches=$m;
195	6	50				372	print "regex: remainder => <$str>\n" if $V;
196	6	50				359	print "regex: matches => [$matches]\n" if $V;
197	6					33	return (1,$str, $matches);
198							} else {
199	0	0				0	print "regex: match failed => <$str>\n" if $V;
200							}
201	0					0	return (0,$str, $matches); # assumes $status is 0\|1, $str is string, $matches is [string]
202	1					5	};
203	1					7	return $gen;
204							}
205
206							sub parens {
207	3			3	0	7	(my $ref)= @_;
208	7			7		11	my $gen = sub { (my $str)=@_;
209	7	50				594	print "* parens($str)\n" if $V;
210	7					13	my $matches=undef;
211	7					19	(my $status, my $str, my $ch)=char('(')->($str);
212	7	100				63	if ($status==1) {
213	6					30	$str=~s/\s*//;
214	6					20	(my $st,$str,$matches)=$ref->($str);
215	6	50				377	print "parens: remainder => <$str>\n" if $V;
216	6	50				46	print "parens: matches => [".Dumper($matches)."]\n" if $V;
217	6					935	$status*=$st;
218	6	50				19	if ($status==1) {
219	6					14	(my $st, $str, my $ch)=char(')')->($str);
220	6	50				49	if ($st==1) {
221	6					27	$str=~s/\s*//;
222							}
223	6					9	$status*=$st;
224	6	50				14	if ($status==1) { # OK!
225	6	50				24	print "parens: matches => ".Dumper($matches)."\n" if $V;
226	6					690	return (1,$str, $matches);
227							} else { # parse failed on closing paren
228	0					0	return (0,$str, $matches);
229							}
230							} else { # parse failed on $ref
231	0					0	return (0,$str, $matches);
232							}
233							} else { # parse failed on opening paren
234	1					4	return (0,$str,undef);
235							}
236	3					18	};
237	3					30	return $gen;
238							}
239
240							sub char {
241	27			27	0	1154	(my $ch)=@_;
242							my $gen = sub {
243	27			27		52	(my $str)=@_;
244	27	100				1669	print "* char('$ch', '$str')\n" if $V;
245	27	100				94	if (substr($str,0,1) eq $ch) {
246	19	100				1288	print "char: matched \'$ch\' \n" if $V;
247	19	100				1276	print "char: remainder <".substr($str,1).">\n" if $V;
248	19					90	return (1,substr($str,1),$ch);
249							} else {
250	8					28	return (0,$str,undef);
251							}
252	27					162	};
253	27					81	return $gen;
254							}
255
256							sub sepBy {
257	2			2	0	3	(my $sep, my $ref)=@_;
258	6			6		11	my $gen = sub { (my $str)=@_;
259	6	50				380	print "* sepBy('$sep', '$str')\n" if $V;
260	6					13	my $matches=[];
261	6					23	(my $status,$str,my $m)=$ref->($str);
262	6	50				19	if ($status) {
263	6					9	push @{$matches},$m;
	6					11
264	6	50				234	print "sepBy: remainder => <$str>\n" if $V;
265	6					10	while( do {($status,$str,$m)=char($sep)->($str);
	12					28
266	12	100				83	if ($status) {$str=~s/\s*//;};
	6					26
267	12					36	$status==1} ) {
268	6					13	(my $st,$str,$m)=$ref->($str);
269	6					52	push @{$matches},$m;
	6					14
270							}
271	6	50				35	print "sepBy matches => [".Dumper($matches)."]\n" if $V;
272							} else { # first match failed.
273	0					0	return (0,$str,undef);
274							}
275	6					670	return (1, $str, $matches);
276	2					10	};
277	2					9	return $gen;
278							}
279							# This is a lexeme parser, so it skips trailing whitespace
280							sub word {
281							my $gen = sub {
282	13			13		29	(my $str)=@_;
283	13	100				695	print "* word( '$str' )\n" if $V;
284	13					22	my $status=0;
285	13	100				55	if(
286							$str=~/^(\w+)/
287							) {
288	12					30	my $m=$1;
289	12					13	my $matches=$m;
290	12					18	$status=1;
291	12					192	$str=~s/^$m\s*//;
292	12	100				716	print "word: remainder => <$str>\n" if $V;
293	12	100				687	print "word: matches => [$matches]\n" if $V;
294	12					58	return ($status,$str, $matches);
295							} else {
296	1	50				4	print "word: match failed => <$str>\n" if $V;
297	1					4	return ($status,$str, undef); # assumes $status is 0\|1, $str is string, $matches is [string]
298							}
299	6			6	0	1797	};
300	6					26	return $gen;
301							}
302							# matches an unsigned integer
303							sub natural {
304							my $gen = sub {
305	6			6		23	(my $str)=@_;
306	6	100				240	print "* natural( '$str' )\n" if $V;
307	6					11	my $status=0;
308	6	100				31	if(
309							$str=~/^(\d+)/
310							) {
311	4					11	my $m=$1;
312	4					8	my $matches=$m;
313	4					8	$status=1;
314	4					40	$str=~s/^$m\s*//;
315	4	100				130	print "natural: remainder => <$str>\n" if $V;
316	4	100				121	print "natural: matches => [$matches]\n" if $V;
317	4					20	return ($status,$str, $matches);
318							} else {
319	2	100				118	print "natural: match failed => <$str>\n" if $V;
320	2					9	return ($status,$str, undef); # assumes $status is 0\|1, $str is string, $matches is [string]
321							}
322	5			5	0	1825	};
323	5					25	return $gen;
324							}
325
326							# As in Parsec, parses a literal and removes trailing whitespace
327							sub symbol {
328	11			11	0	3088	(my $lit_str) = @_;
329	11					48	$lit_str=~s/(\W)/\\$1/g;
330							my $gen = sub {
331	15			15		68	(my $str)=@_;
332	15	100				1073	print "* symbol( '$lit_str', '$str' )\n" if $V;
333	15					26	my $status=0;
334	15	100				353	if(
335							$str=~/^\s$lit_str\s/
336							) {
337	14					32	my $m=$1;
338	14					20	my $matches=$lit_str;
339	14					19	$status=1;
340	14					256	$str=~s/^\s$lit_str\s//;
341	14	100				876	print "symbol: remainder => <$str>\n" if $V;
342	14	100				781	print "symbol: matches => [$matches]\n" if $V;
343	14					69	return ($status,$str, $matches);
344							} else {
345	1	50				4	print "symbol: match failed => <$str>\n" if $V;
346	1					5	return ($status,$str, undef);
347							}
348	11					52	};
349	11					42	return $gen;
350							}
351
352							# This parser parses anything up to the last occurence ofa given literal and trailing whitespace
353							sub greedyUpto {
354	1			1	0	455	(my $lit_str) = @_;
355	1					9	$lit_str=~s/(\W)/\\$1/g;
356							my $gen = sub {
357	1			1		5	(my $str)=@_;
358	1	50				4	print "* greedyUpto( \'$lit_str\', \'$str\' )\n" if $V;
359	1	50				33	if(
360							$str=~/^(.)\s$lit_str\s*/
361							) {
362	1					3	my $m=$1;
363	1					8	$m=~s/\s*$//;
364	1	50				5	my $matches= $m eq '' ? undef : $m;
365	1					25	$str=~s/^.$lit_str\s//;
366	1	50				5	print "greedyUpto: remainder => <$str>\n" if $V;
367	1	50				4	print "greedyUpto: matches => [$matches]\n" if $V;
368	1					7	return (1,$str, $matches);
369							} else {
370	0	0				0	print "greedyUpto: match failed => <$str>\n" if $V;
371	0					0	return (0,$str, undef);
372							}
373	1					7	};
374	1					5	return $gen;
375							}
376
377							# This parser parses anything up to the last occurence of a given literal and trailing whitespace
378							sub upto {
379	1			1	0	509	(my $lit_str )= @_;
380	1					7	$lit_str=~s/(\W)/\\$1/g;
381							my $gen = sub {
382	1			1		5	(my $str)=@_;
383	1	50				4	print "upto1 \'$lit_str\': <$str>\n" if $V;
384	1	50				24	if(
385							$str=~/^(.?)\s$lit_str\s*/
386							) {
387	1					3	my $m=$1;
388	1	50				4	my $matches= $m eq '' ? undef : $m;
389	1					22	$str=~s/^.?$lit_str\s//;
390	1	50				3	print "upto: remainder => <$str>\n" if $V;
391	1	50				11	print "upto: matches => [$matches]\n" if $V;
392	1					5	return (1,$str, $matches);
393							} else {
394	0	0				0	print "upto: match failed => <$str>\n" if $V;
395	0					0	return (0,$str, undef);
396							}
397	1					6	};
398	1					4	return $gen;
399							}
400
401
402							# `many`, as in Parsec, parses 0 or more the specified parsers
403							sub many {
404	0			0	0	0	(my $parser) = @_;
405							my $gen = sub {
406	0			0		0	(my $str)=@_;
407	0	0				0	print "* many( '$str' )\n" if $V;
408	0					0	(my $status,$str,my $m)=$parser->($str);
409	0	0				0	if ($status) {
410	0					0	my $matches = [$m];
411	0					0	while( $status==1 ) {
412	0					0	(my $st,$str,$m)=$parser->($str);
413	0					0	push @{$matches},$m;
	0					0
414							}
415	0	0				0	print "many: remainder => <$str>\n" if $V;
416	0	0				0	print "many: matches => [".Dumper($matches)."]\n" if $V;
417	0					0	return (1, $str, $matches);
418							} else { # first match failed.
419	0	0				0	print "many: first match failed => <$str>\n" if $V;
420	0					0	return (1, $str,undef);
421							}
422	0					0	};
423	0					0	return $gen;
424							}
425
426							# `many1`, as in Parsec, parses 1 or more the specified parsers
427							sub many1 {
428	0			0	0	0	(my $parser) = @_;
429							my $gen = sub {
430	0			0		0	(my $str)=@_;
431	0					0	my $matches=[];
432	0	0				0	print "* many( '$str' )\n" if $V;
433	0					0	(my $status,$str,my $m)=$parser->($str);
434	0	0				0	if ($status) {
435	0					0	push @{$matches},$m;
	0					0
436	0					0	while( $status==1 ) {
437	0					0	(my $st,$str,$m)=$parser->($str);
438	0					0	push @{$matches},$m;
	0					0
439							}
440	0	0				0	print "many: remainder => <$str>\n" if $V;
441	0	0				0	print "many: matches => [".Dumper($matches)."]\n" if $V;
442							} else { # first match failed.
443	0	0				0	print "many: first match failed => <$str>\n" if $V;
444	0					0	return (0, $str,undef);
445							}
446	0					0	return (1, $str, $matches);
447	0					0	};
448	0					0	return $gen;
449							}
450
451							sub comma {
452	7			7		16	my $gen = sub { (my $str) = @_;
453	7	100				384	print "* comma( '$str' )\n" if $V;
454	7					44	my $st = ($str=~s/^\s,\s//);
455	7	100				19	if ($st) {
456	5	100				369	print "comma: match\n" if $V;
457							} else {
458	2	50				10	print "comma: match failed\n" if $V;
459							}
460	7					28	return ($st, $str, undef);
461	3			3	0	512	};
462	3					14	return $gen;
463							}
464
465							sub semi {
466	0			0		0	my $gen = sub { (my $str) = @_;
467	0	0				0	print "* semi( '$str' )\n" if $V;
468	0					0	my $st = ($str=~s/^\s;\s//);
469	0					0	return ($st, $str, undef);
470	0			0	0	0	};
471	0					0	return $gen;
472							}
473
474
475
476							# strip leading whitespace, always success
477							sub whiteSpace {
478							my $gen = sub {
479	6			6		46	(my $str)=@_;
480	6	100				295	print "* whiteSpace( \'$str\' )\n" if $V;
481	6					32	$str=~s/^(\s*)//;
482	6					52	my $m=$1;
483	6					21	return (1,$str,$m)
484	4			4	0	1530	};
485	4					17	return $gen;
486							}
487
488							sub oneOf {
489	0			0	0	0	(my $patt_lst) = @_;
490							my $gen = sub {
491	0			0		0	(my $str)= @_;
492	0	0				0	print "* oneOf([".join('\|',@{$patt_lst})."],'$str')\n" if $V;
	0					0
493	0					0	for my $p (@{$patt_lst}) {
	0					0
494	0					0	(my $status, $str, my $matches)= symbol($p)->($str);
495	0	0				0	if ($status) {
496	0	0				0	print "choice: remainder => <$str>\n" if $V;
497	0	0				0	print "choice: matches => [".Dumper($matches)."]\n" if $V;
498	0					0	return (1, $str, $matches);
499							}
500							}
501	0					0	return (0, $str, undef);
502	0					0	};
503	0					0	return $gen;
504							}
505
506							sub matches {
507	0			0	0	0	return @{$_[0]};
	0					0
508							}
509
510							sub unwrap {
511	0			0	0	0	(my $elt_in_array)=@_;
512	0					0	my $elt = shift @{$elt_in_array};
	0					0
513	0					0	return $elt;
514							}
515
516							sub empty {
517	0			0	0	0	(my $elt_in_array)=@_;
518	0	0				0	return (@{$elt_in_array} ) ? 0 : 1;
	0					0
519							}
520
521							# This function returns labeled items in the parse tree.
522							# It is rather aggressive in removing unlabeled items
523	16			16	0	19	sub get_tree_as_lists { (my $list) = @_;
524	16					32	my $hlist=[];
525	16					18	for my $elt (@{$list}) {
	16					29
526	41	100	66			144	if (ref($elt) eq 'ARRAY' and scalar @{$elt}>0) { # non-empty list
	8	100				30
527	8					21	push @{ $hlist }, get_tree_as_lists($elt);
	8					18
528							} elsif (ref($elt) eq 'HASH') { # hash: need to process the rhs of the pair
529	15					18	(my $k, my $v) = each %{$elt};
	15					41
530	15	100				34	if (ref($v) ne 'ARRAY') { # not an array => wrap in array and redo
	9	100				19
531	6					7	push @{$hlist}, {$k => $v};
	6					28
532							} elsif (@{$v}==1) { # a single-elt array
533	2					3	push @{$hlist}, {$k => $v->[0]};
	2					10
534							} else {
535							my $pv =[
536							map {
537	16	100				59	if (ref($_) eq 'ARRAY') {
	7	100				15
		100
538	1					3	get_tree_as_lists($_)
539							} elsif ( ref($_) eq 'HASH') {
540	4					14	get_tree_as_lists([$_])
541	10					39	} elsif (defined $_) { $_ }
542	7					6	} @{$v}
543							];
544	7					19	push @{$hlist}, {$k => $pv};
	7					33
545							}
546							}
547							}
548	16	100				27	return scalar @{$hlist}==1 ? $hlist->[0] : $hlist;
	16					69
549							}
550
551	14			14	0	19	sub is_list_of_objects { (my $mlo) =$_;
552	14	100				27	if (ref($mlo) eq 'ARRAY') {
553	7					8	my @tmlo=@{$mlo};
	7					25
554	7					12	my @l=grep { ref($_) ne 'HASH' } @tmlo;
	16					43
555	7	100				51	return scalar(@l)?0:1;
556							} else {
557	7					27	return 0;
558							}
559							}
560
561	5			5	0	10	sub l2m { (my $hlist) =@_;
562	5					10	my $hmap = {};
563	5					7	my @hmap_vals = map { (my $k, my $v)=%{$_}; $v } @{$hlist};
	14					21
	14					39
	14					35
	5					9
564	5					8	my @hmap_keys = map { (my $k, my $v)=%{$_}; $k } @{$hlist};
	14					14
	14					29
	14					34
	5					10
565	5	100				10	my @hmap_rvals = map { is_list_of_objects($_) ? l2m($_) : $_ } @hmap_vals;
	14					28
566							# my @hmap_keys_vals = map { each %{$_} } @{$hlist}
567	5					12	for my $k ( @hmap_keys ) {
568	14					21	my $rv = shift @hmap_rvals;
569	14					32	$hmap->{$k}=$rv;
570							}
571							# my %{$hmap} = @hmap_keys_vals;
572	5					21	return $hmap;
573							}
574
575
576
577	3			3	0	16	sub getParseTree { (my $m) =@_;
578	3					10	return l2m(get_tree_as_lists($m));
579							}
580							# return ( (hlist.length==1) ? (head hlist) : hlist) # This just returns 'false' ...
581
582							1;
583
584							__END__