File Coverage

blib/lib/PPIx/Regexp/Node.pm

Criterion	Covered	Total	%
statement	214	234	91.4
branch	87	124	70.1
condition	3	6	50.0
subroutine	43	45	95.5
pod	22	22	100.0
total	369	431	85.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							PPIx::Regexp::Node - Represent a container
4
5							=head1 SYNOPSIS
6
7							use PPIx::Regexp::Dumper;
8							PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print();
9
10							=head1 INHERITANCE
11
12							C is a
13							L.
14
15							C is the parent of L,
16							L and
17							L.
18
19							=head1 DESCRIPTION
20
21							This class represents a structural element that contains other classes.
22							It is an abstract class, not instantiated by the lexer.
23
24							=head1 METHODS
25
26							This class provides the following public methods. Methods not documented
27							here are private, and unsupported in the sense that the author reserves
28							the right to change or remove them without notice.
29
30							=cut
31
32							package PPIx::Regexp::Node;
33
34	9			9		73	use strict;
	9					24
	9					243
35	9			9		51	use warnings;
	9					20
	9					243
36
37	9			9		44	use base qw{ PPIx::Regexp::Element };
	9					14
	9					5119
38
39	9			9		80	use Carp;
	9					17
	9					489
40	9			9		52	use List::Util qw{ max min };
	9					17
	9					496
41	9					965	use PPIx::Regexp::Constant qw{
42							CODE_REF
43							FALSE
44							INFINITY
45							MINIMUM_PERL
46							NODE_UNKNOWN
47							TRUE
48							@CARP_NOT
49	9			9		68	};
	9					22
50	9			9		63	use PPIx::Regexp::Util qw{ __instance __merge_perl_requirements width };
	9					27
	9					476
51	9			9		57	use Scalar::Util qw{ refaddr };
	9					22
	9					506
52
53							our $VERSION = '0.088';
54
55	9			9		55	use constant ELEMENT_UNKNOWN => NODE_UNKNOWN;
	9					15
	9					3464
56
57							sub __new {
58	970			970		2717	my ( $class, @children ) = @_;
59	970					2023	foreach my $elem ( @children ) {
60	2573	50				5100	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
61							}
62	970					3076	my $self = {
63							children => \@children,
64							};
65	970		33			3537	bless $self, ref $class \|\| $class;
66	970					1768	foreach my $elem ( @children ) {
67	2573					6730	$elem->_parent( $self );
68							}
69	970					3473	return $self;
70							}
71
72							=head2 child
73
74							my $kid = $node->child( 0 );
75
76							This method returns the child at the given index. The indices start from
77							zero, and negative indices are from the end of the list, so that
78							C<< $node->child( -1 ) >> returns the last child of the node.
79
80							=cut
81
82							sub child {
83	6138			6138	1	11588	my ( $self, $inx ) = @_;
84	6138	50				12607	defined $inx or $inx = 0;
85	6138					24260	return $self->{children}[$inx];
86							}
87
88							=head2 children
89
90							This method returns the children of the Node. If called in scalar
91							context it returns the number of children.
92
93							=cut
94
95							sub children {
96	2480			2480	1	4693	my ( $self ) = @_;
97	2480					3641	return @{ $self->{children} };
	2480					7183
98							}
99
100							=head2 contains
101
102							print $node->contains( $elem ) ? "yes\n" : "no\n";
103
104							This method returns true if the given element is contained in the node,
105							or false otherwise.
106
107							=cut
108
109							sub contains {
110	1			1	1	9	my ( $self, $elem ) = @_;
111	1	50				5	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
112
113	1					9	my $addr = refaddr( $self );
114
115	1					4	while ( $elem = $elem->parent() ) {
116	3	100				23	$addr == refaddr( $elem ) and return 1;
117							}
118
119	0					0	return;
120							}
121
122							sub content {
123	516			516	1	3193	my ( $self ) = @_;
124	516					1433	return join( '', map{ $_->content() } $self->elements() );
	2138					5078
125							}
126
127							=head2 elements
128
129							This method returns the elements in the Node. For a
130							C proper, it is the same as C.
131
132							=cut
133
134							{
135	9			9		67	no warnings qw{ once };
	9					18
	9					13692
136							*elements = \&children; # sub slements
137							}
138
139							=head2 find
140
141							my $rslt = $node->find( 'PPIx::Regexp::Token::Literal' );
142							my $rslt = $node->find( 'Token::Literal' );
143							my $rslt = $node->find( sub {
144							return $_[1]->isa( 'PPIx::Regexp::Token::Literal' )
145							&& $_[1]->ordinal < ord(' ');
146							} );
147
148							This method finds things.
149
150							If given a string as argument, it is assumed to be a class name
151							(possibly without the leading 'PPIx::Regexp::'), and all elements of the
152							given class are found.
153
154							If given a code reference, that code reference is called once for each
155							element, and passed C<$self> and the element. The code should return
156							true to accept the element, false to reject it, and ( for subclasses of
157							C) C to prevent recursion into the node. If
158							the code throws an exception, you get nothing back from this method.
159
160							Either way, the return is a reference to the list of things found, a
161							false (but defined) value if nothing was found, or C if an error
162							occurred.
163
164							=cut
165
166							sub _find_routine {
167	1343			1343		2384	my ( $want ) = @_;
168	1343	100				3612	CODE_REF eq ref $want
169							and return $want;
170	687	100				1396	ref $want and return;
171	686	100				2809	$want =~ m/ \A PPIx::Regexp:: /smx
172							or $want = 'PPIx::Regexp::' . $want;
173							return sub {
174	6180	100		6180		11369	return __instance( $_[1], $want ) ? 1 : 0;
175	686					3667	};
176							}
177
178							sub find {
179	1336			1336	1	2626	my ( $self, $want ) = @_;
180
181	1336	100				2536	$want = _find_routine( $want ) or return;
182
183	1335					2283	my @found;
184
185							# We use a recursion to find what we want. PPI::Node uses an
186							# iteration.
187	1335					3144	foreach my $elem ( $self->elements() ) {
188	6179	100				9006	my $rslt = eval { $want->( $self, $elem ) }
	6179					9660
189							and push @found, $elem;
190	6179	50				11411	$@ and return;
191
192	6179	100				10868	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
193	651	50				1687	defined $rslt or next;
194							$rslt = $elem->find( $want )
195	651	100				1778	and push @found, @{ $rslt };
	52					147
196							}
197
198	1335	100				8265	return @found ? \@found : 0;
199
200							}
201
202							=head2 find_parents
203
204							my $rslt = $node->find_parents( sub {
205							return $_[1]->isa( 'PPIx::Regexp::Token::Operator' )
206							&& $_[1]->content() eq '\|';
207							} );
208
209							This convenience method takes the same arguments as C, but instead
210							of the found objects themselves returns their parents. No parent will
211							appear more than once in the output.
212
213							This method returns a reference to the array of parents if any were
214							found. If no parents were found the return is false but defined. If an
215							error occurred the return is C.
216
217							=cut
218
219							sub find_parents {
220	1			1	1	5	my ( $self, $want ) = @_;
221
222	1					3	my $found;
223	1	50				4	$found = $self->find( $want ) or return $found;
224
225	1					5	my %parents;
226							my @rslt;
227	1					3	foreach my $elem ( @{ $found } ) {
	1					4
228	2	50				9	my $dad = $elem->parent() or next;
229	2	100				97	$parents{ refaddr( $dad ) }++
230							or push @rslt, $dad;
231							}
232
233	1					8	return \@rslt;
234							}
235
236							=head2 find_first
237
238							This method has the same arguments as L, but returns either a
239							reference to the first element found, a false (but defined) value if no
240							elements were found, or C if an error occurred.
241
242							=cut
243
244							sub find_first {
245	7			7	1	14	my ( $self, $want ) = @_;
246
247	7	50				16	$want = _find_routine( $want ) or return;
248
249							# We use a recursion to find what we want. PPI::Node uses an
250							# iteration.
251	7					23	foreach my $elem ( $self->elements() ) {
252	16	100				26	my $rslt = eval { $want->( $self, $elem ) }
	16					27
253							and return $elem;
254	13	50				28	$@ and return;
255
256	13	100				28	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
257	4	50				17	defined $rslt or next;
258
259	4	50				33	defined( $rslt = $elem->find_first( $want ) )
260							or return;
261	4	50				27	$rslt and return $rslt;
262							}
263
264	0					0	return 0;
265
266							}
267
268							=head2 first_element
269
270							This method returns the first element in the node.
271
272							=cut
273
274							sub first_element {
275	3			3	1	10	my ( $self ) = @_;
276	3					23	return $self->{children}[0];
277							}
278
279							=head2 first_token
280
281							This method returns the first token in the node. If there is none, it
282							returns nothing.
283
284							=cut
285
286							sub first_token {
287	4			4	1	9	my ( $self ) = @_;
288	4	50				54	my $elem = $self->first_element()
289							or return;
290	4					9	my $token;
291	4					25	while ( ! ( $token = $elem->first_token() ) ) {
292	0	0				0	$elem = $elem->next_element()
293							or return;
294							}
295	4					27	return $token;
296							}
297
298							=head2 last_element
299
300							This method returns the last element in the node.
301
302							=cut
303
304							sub last_element {
305	2			2	1	6	my ( $self ) = @_;
306	2					19	return $self->{children}[-1];
307							}
308
309							=head2 last_token
310
311							This method returns the last token in the node. If there is none, it
312							returns nothing.
313
314							=cut
315
316							sub last_token {
317	45			45	1	90	my ( $self ) = @_;
318	45	50				132	my $elem = $self->last_element()
319							or return;
320	45					65	my $token;
321	45					143	while ( ! ( $token = $elem->last_token() ) ) {
322	0	0				0	$elem = $elem->previous_element()
323							or return;
324							}
325	45					184	return $token;
326							}
327
328							sub location {
329	1			1	1	4	my ( $self ) = @_;
330	1	50				7	my $token = $self->first_token()
331							or return undef; ## no critic (ProhibitExplicitReturnUndef)
332	1					5	return $token->location();
333							}
334
335							=head2 is_matcher
336
337							This method returns a true value if any of the node's children does.
338							Otherwise it returns C if any of the node's children does.
339							Otherwise it returns a false (but defined) value.
340
341							=cut
342
343							sub is_matcher {
344	5			5	1	9	my ( $self ) = @_;
345	5					9	my $rslt = 0;
346	5					6	foreach my $kid ( @{ $self->{children} } ) {
	5					12
347	5	50				19	my $kid_rslt = $kid->is_matcher()
348							and return 1;
349	0	0				0	defined $kid_rslt
350							or $rslt = $kid_rslt;
351							}
352	0					0	return $rslt;
353							}
354
355							=head2 perl_version_introduced
356
357							This method returns the maximum value of C
358							returned by any of its elements. In other words, it returns the minimum
359							version of Perl under which this node is valid. If there are no
360							elements, 5.000 is returned, since that is the minimum value of Perl
361							supported by this package.
362
363							=cut
364
365							sub perl_version_introduced {
366	176			176	1	409	my ( $self ) = @_;
367	1038					2428	return max( grep { defined $_ } MINIMUM_PERL,
368							$self->{perl_version_introduced},
369	176					610	map { $_->perl_version_introduced() } $self->elements() );
	686					2436
370							}
371
372							=head2 perl_version_removed
373
374							This method returns the minimum defined value of C
375							returned by any of the node's elements. In other words, it returns the
376							lowest version of Perl in which this node is C valid. If there are
377							no elements, or if no element has a defined C,
378							C is returned.
379
380							=cut
381
382							sub perl_version_removed {
383	182			182	1	349	my ( $self ) = @_;
384	182					272	my $max;
385	182					430	foreach my $elem ( $self->elements() ) {
386	697	100				2145	if ( defined ( my $ver = $elem->perl_version_removed() ) ) {
387	15	50				30	if ( defined $max ) {
388	0	0				0	$ver < $max and $max = $ver;
389							} else {
390	15					27	$max = $ver;
391							}
392							}
393							}
394	182					526	return $max;
395							}
396
397							sub remove_insignificant {
398	0			0	1	0	my ( $self ) = @_;
399	0					0	return $self->__new( map { $_->remove_insignificant() }
	0					0
400							$self->children() );
401							}
402
403							=head2 schild
404
405							This method returns the significant child at the given index; that is,
406							C<< $node->schild(0) >> returns the first significant child,
407							C<< $node->schild(1) >> returns the second significant child, and so on.
408							Negative indices count from the end.
409
410							=cut
411
412							sub schild {
413	22			22	1	53	my ( $self, $inx ) = @_;
414	22	50				63	defined $inx or $inx = 0;
415
416	22					47	my $kids = $self->{children};
417
418	22	100				59	if ( $inx >= 0 ) {
419
420	20					50	my $loc = 0;
421
422	20					62	while ( exists $kids->[$loc] ) {
423	22	100				66	$kids->[$loc]->significant() or next;
424	21	100				66	--$inx >= 0 and next;
425	20					173	return $kids->[$loc];
426							} continue {
427	2					6	$loc++;
428							}
429
430							} else {
431
432	2					6	my $loc = -1;
433
434	2					20	while ( exists $kids->[$loc] ) {
435	5	100				16	$kids->[$loc]->significant() or next;
436	3	100				11	$inx++ < -1 and next;
437	2					10	return $kids->[$loc];
438							} continue {
439	3					5	--$loc;
440							}
441
442							}
443
444	0					0	return;
445							}
446
447							=head2 schildren
448
449							This method returns the significant children of the Node. If called in
450							scalar context it returns the number of significant children.
451
452							=cut
453
454							sub schildren {
455	4			4	1	11	my ( $self ) = @_;
456	4	50				13	if ( wantarray ) {
		0
457	4					8	return ( grep { $_->significant() } @{ $self->{children} } );
	13					35
	4					12
458							} elsif ( defined wantarray ) {
459	0					0	my $kids = 0;
460	0					0	foreach ( @{ $self->{children} } ) {
	0					0
461	0	0				0	$_->significant() and $kids++;
462							}
463	0					0	return $kids;
464							} else {
465	0					0	return;
466							}
467							}
468
469							sub scontent {
470	9			9	1	17	my ( $self ) = @_;
471							# As of the invention of this method all nodes are significant, so
472							# the following statement is pure paranoia on my part. -- TRW
473	9	50				31	$self->significant()
474							or return;
475							# This needs to be elements(), not children() or schildren() -- or
476							# selements() if that is ever invented. Not children() or
477							# schildren() because those ignore the delimiters. Not selements()
478							# (if that ever comes to pass) because scontent() has to make the
479							# significance check, so selements() would be wasted effort.
480	9					22	return join( '', map{ $_->scontent() } $self->elements() );
	75					175
481							}
482
483							sub tokens {
484	35			35	1	90	my ( $self ) = @_;
485	35					84	return ( map { $_->tokens() } $self->elements() );
	144					417
486							}
487
488							sub unescaped_content {
489	0			0	1	0	my ( $self ) = @_;
490	0					0	return join '', map { $_->unescaped_content() } $self->elements();
	0					0
491							}
492
493	9			9		71	use constant ALTERNATION => q<\|>;
	9					23
	9					8056
494
495							{
496							my $obj;
497							sub _alternation_object {
498	390	100		390		1008	unless ( $obj ) {
499
500							=begin comment
501
502							# This is a pain because PPIx::Regexp::Token requires a
503							# tokenizer object.
504							require PPIx::Regexp::Tokenizer;
505							require PPIx::Regexp::Token::Operator;
506							$obj = PPIx::Regexp::Token::Operator->__new(
507							ALTERNATION,
508							tokenizer => PPIx::Regexp::Tokenizer->new( ALTERNATION ),
509							);
510
511							=end comment
512
513							=cut
514
515							# DANGER WILL ROBINSON!
516							# This is a horrible encapsulation violation, which I get
517							# away with because I am using the object as a sentinel.
518
519	2					9	$obj = bless {
520							content => ALTERNATION,
521							}, 'PPIx::Regexp::Token::Operator';
522							}
523	390					797	return $obj;
524							}
525							}
526
527							sub raw_width {
528	396			396	1	661	my ( $self ) = @_;
529	396					1009	return ( $self->__raw_width() )[ 0, 1 ];
530							}
531
532							# PRIVATE TO THIS PACKAGE.
533							# This is the machinery for raw_width(), but because the datum is needed
534							# internally it also returns the number of alternatives found.
535							sub __raw_width {
536	390			390		657	my ( $self ) = @_;
537	390					672	my ( $node_min, $node_max ) = ( INFINITY, 0 );
538	390					695	my ( $raw_min, $raw_max ) = ( 0, 0 );
539	390					553	my $alternatives = 0;
540	390					834	foreach my $elem ( $self->elements(), _alternation_object() ) {
541	1926	100	66			7592	if ( $elem->isa( 'PPIx::Regexp::Token::Operator' ) &&
542							$elem->content() eq ALTERNATION
543							) {
544	428					691	$alternatives++;
545	428	100				1527	defined $node_min
		100
546							and $node_min = defined $raw_min ?
547							min( $node_min, $raw_min ) :
548							undef;
549	428					700	$raw_min = 0;
550	428	100				1206	defined $node_max
		100
551							and $node_max = defined $raw_max ?
552							max( $node_max, $raw_max ) :
553							undef;
554	428					831	$raw_max = 0;
555							} else {
556	1498					3617	my ( $e_min, $e_max ) = $elem->width();
557	1498	100				3840	defined $raw_min
		100
558							and $raw_min = defined $e_min ? $raw_min + $e_min : undef;
559	1498	100				3771	defined $raw_max
		100
560							and $raw_max = defined $e_max ? $raw_max + $e_max : undef;
561							}
562							}
563	390					1446	return ( $node_min, $node_max, $alternatives );
564							}
565
566							# Help for nav();
567							sub __nav {
568	23			23		39	my ( $self, $child ) = @_;
569	23	50				58	refaddr( $child->parent() ) == refaddr( $self )
570							or return;
571	23	50				97	my ( $method, $inx ) = $child->__my_nav()
572							or return;
573
574	23					85	return ( $method => [ $inx ] );
575							}
576
577							sub __error {
578	3			3		13	my ( $self, $msg, %arg ) = @_;
579	3	50				35	defined $msg
580							or $msg = 'Was class ' . ref $self;
581	3					85	$self->ELEMENT_UNKNOWN()->__PPIX_ELEM__rebless( $self, error => $msg );
582	3					12	foreach my $key ( keys %arg ) {
583	2					9	$self->{$key} = $arg{$key};
584							}
585	3					9	return 1;
586							}
587
588							sub __perl_requirements {
589	23			23		41	my ( $self ) = @_;
590	23	100				65	unless ( $self->{perl_requirements} ) {
591	9					50	my @req = $self->__perl_requirements_setup();
592	9					22	foreach my $kid ( $self->children() ) {
593	19					83	push @req, $kid->__perl_requirements();
594							}
595	9					35	$self->{perl_requirements} = [ __merge_perl_requirements( @req ) ];
596							}
597	23					34	return @{ $self->{perl_requirements} };
	23					63
598							}
599
600							sub _token_order {
601	5			5		15	my ( $self ) = @_;
602	5					11	my $order = 0;
603	5					8	delete $self->{_token_order};
604	5					18	foreach my $elem ( $self->tokens() ) {
605	58					137	$self->{_token_order}{ refaddr $elem } = $order++;
606							}
607	5					15	return;
608							}
609
610							# Order two elements according to the position of their last tokens. The
611							# elements must both be descendants of the invocant or an exception is
612							# thrown. The return is equivalent to the space ship operator (<=>).
613							#
614							# For the moment at least this is private to the PPIx-Regexp package.
615							# It is needed by the width() functionality to (try to) determine which
616							# capture group a back reference refers to.
617							sub __token_post_order {
618	42			42		89	my ( $self, $left, $right ) = @_;
619							$self->{_token_order}
620	42	100				126	or $self->_token_order();
621	42					70	my @order;
622	42					112	foreach ( $left, $right ) {
623	84	50				211	ref $_
624							or confess 'Bug - Operand must be a PPIx::Regexp::Element';
625	84	50				202	defined( my $inx = $self->{_token_order}{ refaddr( $_->last_token() ) } )
626							or confess 'Bug - Operand not descendant of invocant';
627	84					184	push @order, $inx;
628							}
629	42					143	return $order[0] <=> $order[1];
630							}
631
632							# Called by the lexer once it has done its worst to all the tokens.
633							# Called as a method with the lexer as argument. The return is the
634							# number of parse failures discovered when finalizing.
635							sub __PPIX_LEXER__finalize {
636	263			263		667	my ( $self, $lexer ) = @_;
637	263					474	my $rslt = 0;
638	263					916	foreach my $elem ( $self->elements() ) {
639	1208					3194	$rslt += $elem->__PPIX_LEXER__finalize( $lexer );
640							}
641	263					674	return $rslt;
642							}
643
644							# Called by the lexer to record the capture number.
645							sub __PPIX_LEXER__record_capture_number {
646	518			518		1171	my ( $self, $number ) = @_;
647	518					1442	foreach my $kid ( $self->children() ) {
648	1284					3871	$number = $kid->__PPIX_LEXER__record_capture_number( $number );
649							}
650	518					1629	return $number;
651							}
652
653							1;
654
655							__END__