File Coverage

blib/lib/PPIx/Regexp/Node.pm

Criterion	Covered	Total	%
statement	214	234	91.4
branch	87	124	70.1
condition	3	6	50.0
subroutine	43	45	95.5
pod	22	22	100.0
total	369	431	85.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							PPIx::Regexp::Node - Represent a container
4
5							=head1 SYNOPSIS
6
7							use PPIx::Regexp::Dumper;
8							PPIx::Regexp::Dumper->new( 'qr{(foo)}' )->print();
9
10							=head1 INHERITANCE
11
12							C is a
13							L.
14
15							C is the parent of L,
16							L and
17							L.
18
19							=head1 DESCRIPTION
20
21							This class represents a structural element that contains other classes.
22							It is an abstract class, not instantiated by the lexer.
23
24							=head1 METHODS
25
26							This class provides the following public methods. Methods not documented
27							here are private, and unsupported in the sense that the author reserves
28							the right to change or remove them without notice.
29
30							=cut
31
32							package PPIx::Regexp::Node;
33
34	9			9		74	use strict;
	9					18
	9					244
35	9			9		41	use warnings;
	9					20
	9					230
36
37	9			9		45	use base qw{ PPIx::Regexp::Element };
	9					17
	9					5092
38
39	9			9		75	use Carp;
	9					18
	9					476
40	9			9		52	use List::Util qw{ max min };
	9					18
	9					499
41	9					967	use PPIx::Regexp::Constant qw{
42							CODE_REF
43							FALSE
44							INFINITY
45							MINIMUM_PERL
46							NODE_UNKNOWN
47							TRUE
48							@CARP_NOT
49	9			9		53	};
	9					17
50	9			9		62	use PPIx::Regexp::Util qw{ __instance __merge_perl_requirements width };
	9					15
	9					459
51	9			9		53	use Scalar::Util qw{ refaddr };
	9					27
	9					510
52
53							our $VERSION = '0.087_01';
54
55	9			9		51	use constant ELEMENT_UNKNOWN => NODE_UNKNOWN;
	9					17
	9					3724
56
57							sub __new {
58	970			970		2568	my ( $class, @children ) = @_;
59	970					2015	foreach my $elem ( @children ) {
60	2573	50				5384	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
61							}
62	970					3724	my $self = {
63							children => \@children,
64							};
65	970		33			3777	bless $self, ref $class \|\| $class;
66	970					1880	foreach my $elem ( @children ) {
67	2573					7034	$elem->_parent( $self );
68							}
69	970					3785	return $self;
70							}
71
72							=head2 child
73
74							my $kid = $node->child( 0 );
75
76							This method returns the child at the given index. The indices start from
77							zero, and negative indices are from the end of the list, so that
78							C<< $node->child( -1 ) >> returns the last child of the node.
79
80							=cut
81
82							sub child {
83	6138			6138	1	11839	my ( $self, $inx ) = @_;
84	6138	50				12615	defined $inx or $inx = 0;
85	6138					24434	return $self->{children}[$inx];
86							}
87
88							=head2 children
89
90							This method returns the children of the Node. If called in scalar
91							context it returns the number of children.
92
93							=cut
94
95							sub children {
96	2480			2480	1	4721	my ( $self ) = @_;
97	2480					3783	return @{ $self->{children} };
	2480					7330
98							}
99
100							=head2 contains
101
102							print $node->contains( $elem ) ? "yes\n" : "no\n";
103
104							This method returns true if the given element is contained in the node,
105							or false otherwise.
106
107							=cut
108
109							sub contains {
110	1			1	1	4	my ( $self, $elem ) = @_;
111	1	50				4	__instance( $elem, 'PPIx::Regexp::Element' ) or return;
112
113	1					5	my $addr = refaddr( $self );
114
115	1					3	while ( $elem = $elem->parent() ) {
116	3	100				18	$addr == refaddr( $elem ) and return 1;
117							}
118
119	0					0	return;
120							}
121
122							sub content {
123	516			516	1	3283	my ( $self ) = @_;
124	516					1403	return join( '', map{ $_->content() } $self->elements() );
	2138					4805
125							}
126
127							=head2 elements
128
129							This method returns the elements in the Node. For a
130							C proper, it is the same as C.
131
132							=cut
133
134							{
135	9			9		67	no warnings qw{ once };
	9					16
	9					14175
136							*elements = \&children; # sub slements
137							}
138
139							=head2 find
140
141							my $rslt = $node->find( 'PPIx::Regexp::Token::Literal' );
142							my $rslt = $node->find( 'Token::Literal' );
143							my $rslt = $node->find( sub {
144							return $_[1]->isa( 'PPIx::Regexp::Token::Literal' )
145							&& $_[1]->ordinal < ord(' ');
146							} );
147
148							This method finds things.
149
150							If given a string as argument, it is assumed to be a class name
151							(possibly without the leading 'PPIx::Regexp::'), and all elements of the
152							given class are found.
153
154							If given a code reference, that code reference is called once for each
155							element, and passed C<$self> and the element. The code should return
156							true to accept the element, false to reject it, and ( for subclasses of
157							C) C to prevent recursion into the node. If
158							the code throws an exception, you get nothing back from this method.
159
160							Either way, the return is a reference to the list of things found, a
161							false (but defined) value if nothing was found, or C if an error
162							occurred.
163
164							=cut
165
166							sub _find_routine {
167	1343			1343		2340	my ( $want ) = @_;
168	1343	100				3674	CODE_REF eq ref $want
169							and return $want;
170	687	100				1399	ref $want and return;
171	686	100				2977	$want =~ m/ \A PPIx::Regexp:: /smx
172							or $want = 'PPIx::Regexp::' . $want;
173							return sub {
174	6180	100		6180		11407	return __instance( $_[1], $want ) ? 1 : 0;
175	686					3591	};
176							}
177
178							sub find {
179	1336			1336	1	2769	my ( $self, $want ) = @_;
180
181	1336	100				2804	$want = _find_routine( $want ) or return;
182
183	1335					2373	my @found;
184
185							# We use a recursion to find what we want. PPI::Node uses an
186							# iteration.
187	1335					3290	foreach my $elem ( $self->elements() ) {
188	6179	100				9083	my $rslt = eval { $want->( $self, $elem ) }
	6179					9934
189							and push @found, $elem;
190	6179	50				11417	$@ and return;
191
192	6179	100				11125	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
193	651	50				1880	defined $rslt or next;
194							$rslt = $elem->find( $want )
195	651	100				1740	and push @found, @{ $rslt };
	52					145
196							}
197
198	1335	100				8571	return @found ? \@found : 0;
199
200							}
201
202							=head2 find_parents
203
204							my $rslt = $node->find_parents( sub {
205							return $_[1]->isa( 'PPIx::Regexp::Token::Operator' )
206							&& $_[1]->content() eq '\|';
207							} );
208
209							This convenience method takes the same arguments as C, but instead
210							of the found objects themselves returns their parents. No parent will
211							appear more than once in the output.
212
213							This method returns a reference to the array of parents if any were
214							found. If no parents were found the return is false but defined. If an
215							error occurred the return is C.
216
217							=cut
218
219							sub find_parents {
220	1			1	1	6	my ( $self, $want ) = @_;
221
222	1					3	my $found;
223	1	50				3	$found = $self->find( $want ) or return $found;
224
225	1					4	my %parents;
226							my @rslt;
227	1					1	foreach my $elem ( @{ $found } ) {
	1					4
228	2	50				9	my $dad = $elem->parent() or next;
229	2	100				27	$parents{ refaddr( $dad ) }++
230							or push @rslt, $dad;
231							}
232
233	1					19	return \@rslt;
234							}
235
236							=head2 find_first
237
238							This method has the same arguments as L, but returns either a
239							reference to the first element found, a false (but defined) value if no
240							elements were found, or C if an error occurred.
241
242							=cut
243
244							sub find_first {
245	7			7	1	24	my ( $self, $want ) = @_;
246
247	7	50				17	$want = _find_routine( $want ) or return;
248
249							# We use a recursion to find what we want. PPI::Node uses an
250							# iteration.
251	7					30	foreach my $elem ( $self->elements() ) {
252	16	100				20	my $rslt = eval { $want->( $self, $elem ) }
	16					32
253							and return $elem;
254	13	50				29	$@ and return;
255
256	13	100				28	__instance( $elem, 'PPIx::Regexp::Node' ) or next;
257	4	50				19	defined $rslt or next;
258
259	4	50				21	defined( $rslt = $elem->find_first( $want ) )
260							or return;
261	4	50				34	$rslt and return $rslt;
262							}
263
264	0					0	return 0;
265
266							}
267
268							=head2 first_element
269
270							This method returns the first element in the node.
271
272							=cut
273
274							sub first_element {
275	3			3	1	14	my ( $self ) = @_;
276	3					18	return $self->{children}[0];
277							}
278
279							=head2 first_token
280
281							This method returns the first token in the node. If there is none, it
282							returns nothing.
283
284							=cut
285
286							sub first_token {
287	4			4	1	12	my ( $self ) = @_;
288	4	50				41	my $elem = $self->first_element()
289							or return;
290	4					8	my $token;
291	4					26	while ( ! ( $token = $elem->first_token() ) ) {
292	0	0				0	$elem = $elem->next_element()
293							or return;
294							}
295	4					21	return $token;
296							}
297
298							=head2 last_element
299
300							This method returns the last element in the node.
301
302							=cut
303
304							sub last_element {
305	2			2	1	9	my ( $self ) = @_;
306	2					11	return $self->{children}[-1];
307							}
308
309							=head2 last_token
310
311							This method returns the last token in the node. If there is none, it
312							returns nothing.
313
314							=cut
315
316							sub last_token {
317	45			45	1	98	my ( $self ) = @_;
318	45	50				154	my $elem = $self->last_element()
319							or return;
320	45					74	my $token;
321	45					143	while ( ! ( $token = $elem->last_token() ) ) {
322	0	0				0	$elem = $elem->previous_element()
323							or return;
324							}
325	45					187	return $token;
326							}
327
328							sub location {
329	1			1	1	5	my ( $self ) = @_;
330	1	50				5	my $token = $self->first_token()
331							or return undef; ## no critic (ProhibitExplicitReturnUndef)
332	1					3	return $token->location();
333							}
334
335							=head2 is_matcher
336
337							This method returns a true value if any of the node's children does.
338							Otherwise it returns C if any of the node's children does.
339							Otherwise it returns a false (but defined) value.
340
341							=cut
342
343							sub is_matcher {
344	5			5	1	12	my ( $self ) = @_;
345	5					8	my $rslt = 0;
346	5					8	foreach my $kid ( @{ $self->{children} } ) {
	5					11
347	5	50				27	my $kid_rslt = $kid->is_matcher()
348							and return 1;
349	0	0				0	defined $kid_rslt
350							or $rslt = $kid_rslt;
351							}
352	0					0	return $rslt;
353							}
354
355							=head2 perl_version_introduced
356
357							This method returns the maximum value of C
358							returned by any of its elements. In other words, it returns the minimum
359							version of Perl under which this node is valid. If there are no
360							elements, 5.000 is returned, since that is the minimum value of Perl
361							supported by this package.
362
363							=cut
364
365							sub perl_version_introduced {
366	176			176	1	432	my ( $self ) = @_;
367	1038					2366	return max( grep { defined $_ } MINIMUM_PERL,
368							$self->{perl_version_introduced},
369	176					594	map { $_->perl_version_introduced() } $self->elements() );
	686					2432
370							}
371
372							=head2 perl_version_removed
373
374							This method returns the minimum defined value of C
375							returned by any of the node's elements. In other words, it returns the
376							lowest version of Perl in which this node is C valid. If there are
377							no elements, or if no element has a defined C,
378							C is returned.
379
380							=cut
381
382							sub perl_version_removed {
383	182			182	1	354	my ( $self ) = @_;
384	182					283	my $max;
385	182					439	foreach my $elem ( $self->elements() ) {
386	697	100				2253	if ( defined ( my $ver = $elem->perl_version_removed() ) ) {
387	15	50				42	if ( defined $max ) {
388	0	0				0	$ver < $max and $max = $ver;
389							} else {
390	15					27	$max = $ver;
391							}
392							}
393							}
394	182					521	return $max;
395							}
396
397							sub remove_insignificant {
398	0			0	1	0	my ( $self ) = @_;
399	0					0	return $self->__new( map { $_->remove_insignificant() }
	0					0
400							$self->children() );
401							}
402
403							=head2 schild
404
405							This method returns the significant child at the given index; that is,
406							C<< $node->schild(0) >> returns the first significant child,
407							C<< $node->schild(1) >> returns the second significant child, and so on.
408							Negative indices count from the end.
409
410							=cut
411
412							sub schild {
413	22			22	1	58	my ( $self, $inx ) = @_;
414	22	50				60	defined $inx or $inx = 0;
415
416	22					50	my $kids = $self->{children};
417
418	22	100				60	if ( $inx >= 0 ) {
419
420	20					37	my $loc = 0;
421
422	20					62	while ( exists $kids->[$loc] ) {
423	22	100				96	$kids->[$loc]->significant() or next;
424	21	100				64	--$inx >= 0 and next;
425	20					175	return $kids->[$loc];
426							} continue {
427	2					5	$loc++;
428							}
429
430							} else {
431
432	2					14	my $loc = -1;
433
434	2					22	while ( exists $kids->[$loc] ) {
435	5	100				19	$kids->[$loc]->significant() or next;
436	3	100				24	$inx++ < -1 and next;
437	2					17	return $kids->[$loc];
438							} continue {
439	3					8	--$loc;
440							}
441
442							}
443
444	0					0	return;
445							}
446
447							=head2 schildren
448
449							This method returns the significant children of the Node. If called in
450							scalar context it returns the number of significant children.
451
452							=cut
453
454							sub schildren {
455	4			4	1	13	my ( $self ) = @_;
456	4	50				13	if ( wantarray ) {
		0
457	4					8	return ( grep { $_->significant() } @{ $self->{children} } );
	13					36
	4					12
458							} elsif ( defined wantarray ) {
459	0					0	my $kids = 0;
460	0					0	foreach ( @{ $self->{children} } ) {
	0					0
461	0	0				0	$_->significant() and $kids++;
462							}
463	0					0	return $kids;
464							} else {
465	0					0	return;
466							}
467							}
468
469							sub scontent {
470	9			9	1	16	my ( $self ) = @_;
471							# As of the invention of this method all nodes are significant, so
472							# the following statement is pure paranoia on my part. -- TRW
473	9	50				28	$self->significant()
474							or return;
475							# This needs to be elements(), not children() or schildren() -- or
476							# selements() if that is ever invented. Not children() or
477							# schildren() because those ignore the delimiters. Not selements()
478							# (if that ever comes to pass) because scontent() has to make the
479							# significance check, so selements() would be wasted effort.
480	9					24	return join( '', map{ $_->scontent() } $self->elements() );
	75					175
481							}
482
483							sub tokens {
484	35			35	1	101	my ( $self ) = @_;
485	35					103	return ( map { $_->tokens() } $self->elements() );
	144					495
486							}
487
488							sub unescaped_content {
489	0			0	1	0	my ( $self ) = @_;
490	0					0	return join '', map { $_->unescaped_content() } $self->elements();
	0					0
491							}
492
493	9			9		91	use constant ALTERNATION => q<\|>;
	9					20
	9					8551
494
495							{
496							my $obj;
497							sub _alternation_object {
498	390	100		390		962	unless ( $obj ) {
499
500							=begin comment
501
502							# This is a pain because PPIx::Regexp::Token requires a
503							# tokenizer object.
504							require PPIx::Regexp::Tokenizer;
505							require PPIx::Regexp::Token::Operator;
506							$obj = PPIx::Regexp::Token::Operator->__new(
507							ALTERNATION,
508							tokenizer => PPIx::Regexp::Tokenizer->new( ALTERNATION ),
509							);
510
511							=end comment
512
513							=cut
514
515							# DANGER WILL ROBINSON!
516							# This is a horrible encapsulation violation, which I get
517							# away with because I am using the object as a sentinel.
518
519	2					10	$obj = bless {
520							content => ALTERNATION,
521							}, 'PPIx::Regexp::Token::Operator';
522							}
523	390					790	return $obj;
524							}
525							}
526
527							sub raw_width {
528	396			396	1	692	my ( $self ) = @_;
529	396					1056	return ( $self->__raw_width() )[ 0, 1 ];
530							}
531
532							# PRIVATE TO THIS PACKAGE.
533							# This is the machinery for raw_width(), but because the datum is needed
534							# internally it also returns the number of alternatives found.
535							sub __raw_width {
536	390			390		683	my ( $self ) = @_;
537	390					694	my ( $node_min, $node_max ) = ( INFINITY, 0 );
538	390					646	my ( $raw_min, $raw_max ) = ( 0, 0 );
539	390					553	my $alternatives = 0;
540	390					875	foreach my $elem ( $self->elements(), _alternation_object() ) {
541	1926	100	66			7360	if ( $elem->isa( 'PPIx::Regexp::Token::Operator' ) &&
542							$elem->content() eq ALTERNATION
543							) {
544	428					649	$alternatives++;
545	428	100				1490	defined $node_min
		100
546							and $node_min = defined $raw_min ?
547							min( $node_min, $raw_min ) :
548							undef;
549	428					620	$raw_min = 0;
550	428	100				1114	defined $node_max
		100
551							and $node_max = defined $raw_max ?
552							max( $node_max, $raw_max ) :
553							undef;
554	428					810	$raw_max = 0;
555							} else {
556	1498					3674	my ( $e_min, $e_max ) = $elem->width();
557	1498	100				4090	defined $raw_min
		100
558							and $raw_min = defined $e_min ? $raw_min + $e_min : undef;
559	1498	100				3827	defined $raw_max
		100
560							and $raw_max = defined $e_max ? $raw_max + $e_max : undef;
561							}
562							}
563	390					1385	return ( $node_min, $node_max, $alternatives );
564							}
565
566							# Help for nav();
567							sub __nav {
568	23			23		42	my ( $self, $child ) = @_;
569	23	50				61	refaddr( $child->parent() ) == refaddr( $self )
570							or return;
571	23	50				96	my ( $method, $inx ) = $child->__my_nav()
572							or return;
573
574	23					87	return ( $method => [ $inx ] );
575							}
576
577							sub __error {
578	3			3		17	my ( $self, $msg, %arg ) = @_;
579	3	50				27	defined $msg
580							or $msg = 'Was class ' . ref $self;
581	3					59	$self->ELEMENT_UNKNOWN()->__PPIX_ELEM__rebless( $self, error => $msg );
582	3					12	foreach my $key ( keys %arg ) {
583	2					11	$self->{$key} = $arg{$key};
584							}
585	3					9	return 1;
586							}
587
588							sub __perl_requirements {
589	23			23		44	my ( $self ) = @_;
590	23	100				58	unless ( $self->{perl_requirements} ) {
591	9					49	my @req = $self->__perl_requirements_setup();
592	9					22	foreach my $kid ( $self->children() ) {
593	19					106	push @req, $kid->__perl_requirements();
594							}
595	9					34	$self->{perl_requirements} = [ __merge_perl_requirements( @req ) ];
596							}
597	23					40	return @{ $self->{perl_requirements} };
	23					72
598							}
599
600							sub _token_order {
601	5			5		13	my ( $self ) = @_;
602	5					13	my $order = 0;
603	5					12	delete $self->{_token_order};
604	5					25	foreach my $elem ( $self->tokens() ) {
605	58					146	$self->{_token_order}{ refaddr $elem } = $order++;
606							}
607	5					26	return;
608							}
609
610							# Order two elements according to the position of their last tokens. The
611							# elements must both be descendants of the invocant or an exception is
612							# thrown. The return is equivalent to the space ship operator (<=>).
613							#
614							# For the moment at least this is private to the PPIx-Regexp package.
615							# It is needed by the width() functionality to (try to) determine which
616							# capture group a back reference refers to.
617							sub __token_post_order {
618	42			42		112	my ( $self, $left, $right ) = @_;
619							$self->{_token_order}
620	42	100				128	or $self->_token_order();
621	42					76	my @order;
622	42					98	foreach ( $left, $right ) {
623	84	50				175	ref $_
624							or confess 'Bug - Operand must be a PPIx::Regexp::Element';
625	84	50				198	defined( my $inx = $self->{_token_order}{ refaddr( $_->last_token() ) } )
626							or confess 'Bug - Operand not descendant of invocant';
627	84					172	push @order, $inx;
628							}
629	42					146	return $order[0] <=> $order[1];
630							}
631
632							# Called by the lexer once it has done its worst to all the tokens.
633							# Called as a method with the lexer as argument. The return is the
634							# number of parse failures discovered when finalizing.
635							sub __PPIX_LEXER__finalize {
636	263			263		838	my ( $self, $lexer ) = @_;
637	263					514	my $rslt = 0;
638	263					874	foreach my $elem ( $self->elements() ) {
639	1208					3654	$rslt += $elem->__PPIX_LEXER__finalize( $lexer );
640							}
641	263					833	return $rslt;
642							}
643
644							# Called by the lexer to record the capture number.
645							sub __PPIX_LEXER__record_capture_number {
646	518			518		1273	my ( $self, $number ) = @_;
647	518					1576	foreach my $kid ( $self->children() ) {
648	1284					4174	$number = $kid->__PPIX_LEXER__record_capture_number( $number );
649							}
650	518					1862	return $number;
651							}
652
653							1;
654
655							__END__