File Coverage

blib/lib/PPIx/Regexp/Node.pm

Criterion	Covered	Total	%
statement	214	234	91.4
branch	87	124	70.1
condition	3	6	50.0
subroutine	43	45	95.5
pod	22	22	100.0
total	369	431	85.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							PPIx::Regexp::Node - Represent a container
4
5							=head1 SYNOPSIS
6
7							use PPIx::Regexp::Dumper;
8							PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print();
9
10							=head1 INHERITANCE
11
12							C is a
13							L.
14
15							C is the parent of L,
16							L and
17							L.
18
19							=head1 DESCRIPTION
20
21							This class represents a structural element that contains other classes.
22							It is an abstract class, not instantiated by the lexer.
23
24							=head1 METHODS
25
26							This class provides the following public methods. Methods not documented
27							here are private, and unsupported in the sense that the author reserves
28							the right to change or remove them without notice.
29
30							=cut
31
32							package PPIx::Regexp::Node;
33
34	9			9		79	use strict;
	9					23
	9					269
35	9			9		60	use warnings;
	9					22
	9					249
36
37	9			9		45	use base qw{ PPIx::Regexp::Element };
	9					17
	9					5429
38
39	9			9		71	use Carp;
	9					20
	9					493
40	9			9		66	use List::Util qw{ max min };
	9					17
	9					506
41	9					1043	use PPIx::Regexp::Constant qw{
42							CODE_REF
43							FALSE
44							INFINITY
45							MINIMUM_PERL
46							NODE_UNKNOWN
47							TRUE
48							@CARP_NOT
49	9			9		54	};
	9					18
50	9			9		67	use PPIx::Regexp::Util qw{ __instance __merge_perl_requirements width };
	9					17
	9					486
51	9			9		55	use Scalar::Util qw{ refaddr };
	9					31
	9					561
52
53							our $VERSION = '0.087';
54
55	9			9		56	use constant ELEMENT_UNKNOWN => NODE_UNKNOWN;
	9					26
	9					3895
56
57							sub __new {
58	970			970		2749	my ( $class, @children ) = @_;
59	970					2087	foreach my $elem ( @children ) {
60	2573	50				5131	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
61							}
62	970					3651	my $self = {
63							children => \@children,
64							};
65	970		33			3859	bless $self, ref $class \|\| $class;
66	970					1931	foreach my $elem ( @children ) {
67	2573					8945	$elem->_parent( $self );
68							}
69	970					3614	return $self;
70							}
71
72							=head2 child
73
74							my $kid = $node->child( 0 );
75
76							This method returns the child at the given index. The indices start from
77							zero, and negative indices are from the end of the list, so that
78							C<< $node->child( -1 ) >> returns the last child of the node.
79
80							=cut
81
82							sub child {
83	6138			6138	1	11360	my ( $self, $inx ) = @_;
84	6138	50				12081	defined $inx or $inx = 0;
85	6138					24960	return $self->{children}[$inx];
86							}
87
88							=head2 children
89
90							This method returns the children of the Node. If called in scalar
91							context it returns the number of children.
92
93							=cut
94
95							sub children {
96	2480			2480	1	4620	my ( $self ) = @_;
97	2480					3606	return @{ $self->{children} };
	2480					7074
98							}
99
100							=head2 contains
101
102							print $node->contains( $elem ) ? "yes\n" : "no\n";
103
104							This method returns true if the given element is contained in the node,
105							or false otherwise.
106
107							=cut
108
109							sub contains {
110	1			1	1	6	my ( $self, $elem ) = @_;
111	1	50				4	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
112
113	1					17	my $addr = refaddr( $self );
114
115	1					9	while ( $elem = $elem->parent() ) {
116	3	100				20	$addr == refaddr( $elem ) and return 1;
117							}
118
119	0					0	return;
120							}
121
122							sub content {
123	516			516	1	3634	my ( $self ) = @_;
124	516					1429	return join( '', map{ $_->content() } $self->elements() );
	2138					4855
125							}
126
127							=head2 elements
128
129							This method returns the elements in the Node. For a
130							C proper, it is the same as C.
131
132							=cut
133
134							{
135	9			9		73	no warnings qw{ once };
	9					18
	9					15259
136							*elements = \&children; # sub slements
137							}
138
139							=head2 find
140
141							my $rslt = $node->find( 'PPIx::Regexp::Token::Literal' );
142							my $rslt = $node->find( 'Token::Literal' );
143							my $rslt = $node->find( sub {
144							return $_[1]->isa( 'PPIx::Regexp::Token::Literal' )
145							&& $_[1]->ordinal < ord(' ');
146							} );
147
148							This method finds things.
149
150							If given a string as argument, it is assumed to be a class name
151							(possibly without the leading 'PPIx::Regexp::'), and all elements of the
152							given class are found.
153
154							If given a code reference, that code reference is called once for each
155							element, and passed C<$self> and the element. The code should return
156							true to accept the element, false to reject it, and ( for subclasses of
157							C) C to prevent recursion into the node. If
158							the code throws an exception, you get nothing back from this method.
159
160							Either way, the return is a reference to the list of things found, a
161							false (but defined) value if nothing was found, or C if an error
162							occurred.
163
164							=cut
165
166							sub _find_routine {
167	1343			1343		2347	my ( $want ) = @_;
168	1343	100				3775	CODE_REF eq ref $want
169							and return $want;
170	687	100				1478	ref $want and return;
171	686	100				3022	$want =~ m/ \A PPIx::Regexp:: /smx
172							or $want = 'PPIx::Regexp::' . $want;
173							return sub {
174	6180	100		6180		11175	return __instance( $_[1], $want ) ? 1 : 0;
175	686					3658	};
176							}
177
178							sub find {
179	1336			1336	1	2677	my ( $self, $want ) = @_;
180
181	1336	100				2515	$want = _find_routine( $want ) or return;
182
183	1335					2390	my @found;
184
185							# We use a recursion to find what we want. PPI::Node uses an
186							# iteration.
187	1335					3198	foreach my $elem ( $self->elements() ) {
188	6179	100				8934	my $rslt = eval { $want->( $self, $elem ) }
	6179					9621
189							and push @found, $elem;
190	6179	50				11485	$@ and return;
191
192	6179	100				10495	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
193	651	50				1903	defined $rslt or next;
194							$rslt = $elem->find( $want )
195	651	100				1939	and push @found, @{ $rslt };
	52					162
196							}
197
198	1335	100				9076	return @found ? \@found : 0;
199
200							}
201
202							=head2 find_parents
203
204							my $rslt = $node->find_parents( sub {
205							return $_[1]->isa( 'PPIx::Regexp::Token::Operator' )
206							&& $_[1]->content() eq '\|';
207							} );
208
209							This convenience method takes the same arguments as C, but instead
210							of the found objects themselves returns their parents. No parent will
211							appear more than once in the output.
212
213							This method returns a reference to the array of parents if any were
214							found. If no parents were found the return is false but defined. If an
215							error occurred the return is C.
216
217							=cut
218
219							sub find_parents {
220	1			1	1	3	my ( $self, $want ) = @_;
221
222	1					2	my $found;
223	1	50				4	$found = $self->find( $want ) or return $found;
224
225	1					4	my %parents;
226							my @rslt;
227	1					2	foreach my $elem ( @{ $found } ) {
	1					4
228	2	50				8	my $dad = $elem->parent() or next;
229	2	100				15	$parents{ refaddr( $dad ) }++
230							or push @rslt, $dad;
231							}
232
233	1					7	return \@rslt;
234							}
235
236							=head2 find_first
237
238							This method has the same arguments as L, but returns either a
239							reference to the first element found, a false (but defined) value if no
240							elements were found, or C if an error occurred.
241
242							=cut
243
244							sub find_first {
245	7			7	1	26	my ( $self, $want ) = @_;
246
247	7	50				17	$want = _find_routine( $want ) or return;
248
249							# We use a recursion to find what we want. PPI::Node uses an
250							# iteration.
251	7					26	foreach my $elem ( $self->elements() ) {
252	16	100				29	my $rslt = eval { $want->( $self, $elem ) }
	16					29
253							and return $elem;
254	13	50				27	$@ and return;
255
256	13	100				27	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
257	4	50				14	defined $rslt or next;
258
259	4	50				18	defined( $rslt = $elem->find_first( $want ) )
260							or return;
261	4	50				29	$rslt and return $rslt;
262							}
263
264	0					0	return 0;
265
266							}
267
268							=head2 first_element
269
270							This method returns the first element in the node.
271
272							=cut
273
274							sub first_element {
275	3			3	1	16	my ( $self ) = @_;
276	3					20	return $self->{children}[0];
277							}
278
279							=head2 first_token
280
281							This method returns the first token in the node. If there is none, it
282							returns nothing.
283
284							=cut
285
286							sub first_token {
287	4			4	1	12	my ( $self ) = @_;
288	4	50				39	my $elem = $self->first_element()
289							or return;
290	4					9	my $token;
291	4					31	while ( ! ( $token = $elem->first_token() ) ) {
292	0	0				0	$elem = $elem->next_element()
293							or return;
294							}
295	4					22	return $token;
296							}
297
298							=head2 last_element
299
300							This method returns the last element in the node.
301
302							=cut
303
304							sub last_element {
305	2			2	1	10	my ( $self ) = @_;
306	2					13	return $self->{children}[-1];
307							}
308
309							=head2 last_token
310
311							This method returns the last token in the node. If there is none, it
312							returns nothing.
313
314							=cut
315
316							sub last_token {
317	45			45	1	89	my ( $self ) = @_;
318	45	50				133	my $elem = $self->last_element()
319							or return;
320	45					70	my $token;
321	45					178	while ( ! ( $token = $elem->last_token() ) ) {
322	0	0				0	$elem = $elem->previous_element()
323							or return;
324							}
325	45					194	return $token;
326							}
327
328							sub location {
329	1			1	1	6	my ( $self ) = @_;
330	1	50				7	my $token = $self->first_token()
331							or return undef; ## no critic (ProhibitExplicitReturnUndef)
332	1					7	return $token->location();
333							}
334
335							=head2 is_matcher
336
337							This method returns a true value if any of the node's children does.
338							Otherwise it returns C if any of the node's children does.
339							Otherwise it returns a false (but defined) value.
340
341							=cut
342
343							sub is_matcher {
344	5			5	1	10	my ( $self ) = @_;
345	5					9	my $rslt = 0;
346	5					7	foreach my $kid ( @{ $self->{children} } ) {
	5					12
347	5	50				35	my $kid_rslt = $kid->is_matcher()
348							and return 1;
349	0	0				0	defined $kid_rslt
350							or $rslt = $kid_rslt;
351							}
352	0					0	return $rslt;
353							}
354
355							=head2 perl_version_introduced
356
357							This method returns the maximum value of C
358							returned by any of its elements. In other words, it returns the minimum
359							version of Perl under which this node is valid. If there are no
360							elements, 5.000 is returned, since that is the minimum value of Perl
361							supported by this package.
362
363							=cut
364
365							sub perl_version_introduced {
366	176			176	1	435	my ( $self ) = @_;
367	1038					2325	return max( grep { defined $_ } MINIMUM_PERL,
368							$self->{perl_version_introduced},
369	176					589	map { $_->perl_version_introduced() } $self->elements() );
	686					2572
370							}
371
372							=head2 perl_version_removed
373
374							This method returns the minimum defined value of C
375							returned by any of the node's elements. In other words, it returns the
376							lowest version of Perl in which this node is C valid. If there are
377							no elements, or if no element has a defined C,
378							C is returned.
379
380							=cut
381
382							sub perl_version_removed {
383	182			182	1	369	my ( $self ) = @_;
384	182					286	my $max;
385	182					537	foreach my $elem ( $self->elements() ) {
386	697	100				2255	if ( defined ( my $ver = $elem->perl_version_removed() ) ) {
387	15	50				32	if ( defined $max ) {
388	0	0				0	$ver < $max and $max = $ver;
389							} else {
390	15					29	$max = $ver;
391							}
392							}
393							}
394	182					500	return $max;
395							}
396
397							sub remove_insignificant {
398	0			0	1	0	my ( $self ) = @_;
399	0					0	return $self->__new( map { $_->remove_insignificant() }
	0					0
400							$self->children() );
401							}
402
403							=head2 schild
404
405							This method returns the significant child at the given index; that is,
406							C<< $node->schild(0) >> returns the first significant child,
407							C<< $node->schild(1) >> returns the second significant child, and so on.
408							Negative indices count from the end.
409
410							=cut
411
412							sub schild {
413	22			22	1	55	my ( $self, $inx ) = @_;
414	22	50				83	defined $inx or $inx = 0;
415
416	22					50	my $kids = $self->{children};
417
418	22	100				58	if ( $inx >= 0 ) {
419
420	20					36	my $loc = 0;
421
422	20					63	while ( exists $kids->[$loc] ) {
423	22	100				70	$kids->[$loc]->significant() or next;
424	21	100				68	--$inx >= 0 and next;
425	20					175	return $kids->[$loc];
426							} continue {
427	2					5	$loc++;
428							}
429
430							} else {
431
432	2					7	my $loc = -1;
433
434	2					16	while ( exists $kids->[$loc] ) {
435	5	100				15	$kids->[$loc]->significant() or next;
436	3	100				14	$inx++ < -1 and next;
437	2					11	return $kids->[$loc];
438							} continue {
439	3					9	--$loc;
440							}
441
442							}
443
444	0					0	return;
445							}
446
447							=head2 schildren
448
449							This method returns the significant children of the Node. If called in
450							scalar context it returns the number of significant children.
451
452							=cut
453
454							sub schildren {
455	4			4	1	15	my ( $self ) = @_;
456	4	50				16	if ( wantarray ) {
		0
457	4					7	return ( grep { $_->significant() } @{ $self->{children} } );
	13					35
	4					15
458							} elsif ( defined wantarray ) {
459	0					0	my $kids = 0;
460	0					0	foreach ( @{ $self->{children} } ) {
	0					0
461	0	0				0	$_->significant() and $kids++;
462							}
463	0					0	return $kids;
464							} else {
465	0					0	return;
466							}
467							}
468
469							sub scontent {
470	9			9	1	17	my ( $self ) = @_;
471							# As of the invention of this method all nodes are significant, so
472							# the following statement is pure paranoia on my part. -- TRW
473	9	50				32	$self->significant()
474							or return;
475							# This needs to be elements(), not children() or schildren() -- or
476							# selements() if that is ever invented. Not children() or
477							# schildren() because those ignore the delimiters. Not selements()
478							# (if that ever comes to pass) because scontent() has to make the
479							# significance check, so selements() would be wasted effort.
480	9					25	return join( '', map{ $_->scontent() } $self->elements() );
	75					176
481							}
482
483							sub tokens {
484	35			35	1	107	my ( $self ) = @_;
485	35					86	return ( map { $_->tokens() } $self->elements() );
	144					490
486							}
487
488							sub unescaped_content {
489	0			0	1	0	my ( $self ) = @_;
490	0					0	return join '', map { $_->unescaped_content() } $self->elements();
	0					0
491							}
492
493	9			9		74	use constant ALTERNATION => q<\|>;
	9					17
	9					9023
494
495							{
496							my $obj;
497							sub _alternation_object {
498	390	100		390		917	unless ( $obj ) {
499
500							=begin comment
501
502							# This is a pain because PPIx::Regexp::Token requires a
503							# tokenizer object.
504							require PPIx::Regexp::Tokenizer;
505							require PPIx::Regexp::Token::Operator;
506							$obj = PPIx::Regexp::Token::Operator->__new(
507							ALTERNATION,
508							tokenizer => PPIx::Regexp::Tokenizer->new( ALTERNATION ),
509							);
510
511							=end comment
512
513							=cut
514
515							# DANGER WILL ROBINSON!
516							# This is a horrible encapsulation violation, which I get
517							# away with because I am using the object as a sentinel.
518
519	2					9	$obj = bless {
520							content => ALTERNATION,
521							}, 'PPIx::Regexp::Token::Operator';
522							}
523	390					763	return $obj;
524							}
525							}
526
527							sub raw_width {
528	396			396	1	711	my ( $self ) = @_;
529	396					1106	return ( $self->__raw_width() )[ 0, 1 ];
530							}
531
532							# PRIVATE TO THIS PACKAGE.
533							# This is the machinery for raw_width(), but because the datum is needed
534							# internally it also returns the number of alternatives found.
535							sub __raw_width {
536	390			390		649	my ( $self ) = @_;
537	390					722	my ( $node_min, $node_max ) = ( INFINITY, 0 );
538	390					618	my ( $raw_min, $raw_max ) = ( 0, 0 );
539	390					535	my $alternatives = 0;
540	390					860	foreach my $elem ( $self->elements(), _alternation_object() ) {
541	1926	100	66			7576	if ( $elem->isa( 'PPIx::Regexp::Token::Operator' ) &&
542							$elem->content() eq ALTERNATION
543							) {
544	428					653	$alternatives++;
545	428	100				1479	defined $node_min
		100
546							and $node_min = defined $raw_min ?
547							min( $node_min, $raw_min ) :
548							undef;
549	428					643	$raw_min = 0;
550	428	100				1076	defined $node_max
		100
551							and $node_max = defined $raw_max ?
552							max( $node_max, $raw_max ) :
553							undef;
554	428					814	$raw_max = 0;
555							} else {
556	1498					3668	my ( $e_min, $e_max ) = $elem->width();
557	1498	100				3899	defined $raw_min
		100
558							and $raw_min = defined $e_min ? $raw_min + $e_min : undef;
559	1498	100				3797	defined $raw_max
		100
560							and $raw_max = defined $e_max ? $raw_max + $e_max : undef;
561							}
562							}
563	390					1473	return ( $node_min, $node_max, $alternatives );
564							}
565
566							# Help for nav();
567							sub __nav {
568	23			23		47	my ( $self, $child ) = @_;
569	23	50				63	refaddr( $child->parent() ) == refaddr( $self )
570							or return;
571	23	50				91	my ( $method, $inx ) = $child->__my_nav()
572							or return;
573
574	23					85	return ( $method => [ $inx ] );
575							}
576
577							sub __error {
578	3			3		15	my ( $self, $msg, %arg ) = @_;
579	3	50				19	defined $msg
580							or $msg = 'Was class ' . ref $self;
581	3					66	$self->ELEMENT_UNKNOWN()->__PPIX_ELEM__rebless( $self, error => $msg );
582	3					9	foreach my $key ( keys %arg ) {
583	2					9	$self->{$key} = $arg{$key};
584							}
585	3					12	return 1;
586							}
587
588							sub __perl_requirements {
589	23			23		45	my ( $self ) = @_;
590	23	100				54	unless ( $self->{perl_requirements} ) {
591	9					65	my @req = $self->__perl_requirements_setup();
592	9					21	foreach my $kid ( $self->children() ) {
593	19					102	push @req, $kid->__perl_requirements();
594							}
595	9					49	$self->{perl_requirements} = [ __merge_perl_requirements( @req ) ];
596							}
597	23					40	return @{ $self->{perl_requirements} };
	23					64
598							}
599
600							sub _token_order {
601	5			5		13	my ( $self ) = @_;
602	5					13	my $order = 0;
603	5					10	delete $self->{_token_order};
604	5					26	foreach my $elem ( $self->tokens() ) {
605	58					155	$self->{_token_order}{ refaddr $elem } = $order++;
606							}
607	5					20	return;
608							}
609
610							# Order two elements according to the position of their last tokens. The
611							# elements must both be descendants of the invocant or an exception is
612							# thrown. The return is equivalent to the space ship operator (<=>).
613							#
614							# For the moment at least this is private to the PPIx-Regexp package.
615							# It is needed by the width() functionality to (try to) determine which
616							# capture group a back reference refers to.
617							sub __token_post_order {
618	42			42		94	my ( $self, $left, $right ) = @_;
619							$self->{_token_order}
620	42	100				133	or $self->_token_order();
621	42					65	my @order;
622	42					108	foreach ( $left, $right ) {
623	84	50				180	ref $_
624							or confess 'Bug - Operand must be a PPIx::Regexp::Element';
625	84	50				218	defined( my $inx = $self->{_token_order}{ refaddr( $_->last_token() ) } )
626							or confess 'Bug - Operand not descendant of invocant';
627	84					177	push @order, $inx;
628							}
629	42					154	return $order[0] <=> $order[1];
630							}
631
632							# Called by the lexer once it has done its worst to all the tokens.
633							# Called as a method with the lexer as argument. The return is the
634							# number of parse failures discovered when finalizing.
635							sub __PPIX_LEXER__finalize {
636	263			263		746	my ( $self, $lexer ) = @_;
637	263					517	my $rslt = 0;
638	263					1003	foreach my $elem ( $self->elements() ) {
639	1208					3437	$rslt += $elem->__PPIX_LEXER__finalize( $lexer );
640							}
641	263					749	return $rslt;
642							}
643
644							# Called by the lexer to record the capture number.
645							sub __PPIX_LEXER__record_capture_number {
646	518			518		1666	my ( $self, $number ) = @_;
647	518					1674	foreach my $kid ( $self->children() ) {
648	1284					4151	$number = $kid->__PPIX_LEXER__record_capture_number( $number );
649							}
650	518					1839	return $number;
651							}
652
653							1;
654
655							__END__