File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitCaptureWithoutTest.pm

Criterion	Covered	Total	%
statement	123	127	96.8
branch	71	90	78.8
condition	45	63	71.4
subroutine	20	20	100.0
pod	4	5	80.0
total	263	305	86.2

line	stmt	bran	cond	sub	pod	time	code
1							package Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest;
2
3	40			40		28806	use 5.010001;
	40					207
4	40			40		273	use strict;
	40					142
	40					897
5	40			40		235	use warnings;
	40					119
	40					1160
6	40			40		267	use Readonly;
	40					128
	40					2228
7
8	40			40		346	use Perl::Critic::Utils qw{ :booleans :data_conversion :severities };
	40					166
	40					2103
9	40			40		8581	use parent 'Perl::Critic::Policy';
	40					164
	40					262
10
11							our $VERSION = '1.146';
12
13							#-----------------------------------------------------------------------------
14
15							Readonly::Hash my %CONDITIONAL_OPERATOR => hashify( qw{ && \|\| ? and or xor } );
16							Readonly::Hash my %UNAMBIGUOUS_CONTROL_TRANSFER => hashify(
17							qw< next last redo return > );
18
19							Readonly::Scalar my $DESC => q{Capture variable used outside conditional};
20							Readonly::Scalar my $EXPL => [ 253 ];
21
22							#-----------------------------------------------------------------------------
23
24							sub supported_parameters { return (
25							{
26	115			115	0	2328	name => 'exception_source',
27							description => 'Names of ways to generate exceptions',
28							behavior => 'string list',
29							list_always_present_values => [ qw{ die croak confess } ],
30							}
31							);
32							}
33	86			86	1	470	sub default_severity { return $SEVERITY_MEDIUM }
34	86			86	1	374	sub default_themes { return qw(core pbp maintenance certrule ) }
35	55			55	1	180	sub applies_to { return 'PPI::Token::Magic' }
36
37							#-----------------------------------------------------------------------------
38
39							sub violates {
40	76			76	1	179	my ($self, $elem, $doc) = @_;
41							# TODO named capture variables
42	76	100				224	return if $elem !~ m/\A \$[1-9] \z/xms;
43	59	100				458	return if _is_in_conditional_expression($elem);
44	51	100				175	return if $self->_is_in_conditional_structure($elem);
45	12					60	return $self->violation( $DESC, $EXPL, $elem );
46							}
47
48							sub _is_in_conditional_expression {
49	79			79		151	my $elem = shift;
50
51							# simplistic check: is there a conditional operator between a match and
52							# the capture var?
53	79					249	my $psib = $elem->sprevious_sibling;
54	79					2038	while ($psib) {
55	105	100				989	if ($psib->isa('PPI::Token::Operator')) {
56	24					60	my $op = $psib->content;
57	24	100				174	if ( $CONDITIONAL_OPERATOR{ $op } ) {
58	12					83	$psib = $psib->sprevious_sibling;
59	12					255	while ($psib) {
60	12	100				69	return 1 if ($psib->isa('PPI::Token::Regexp::Match'));
61	4	50				21	return 1 if ($psib->isa('PPI::Token::Regexp::Substitute'));
62	0					0	$psib = $psib->sprevious_sibling;
63							}
64	0					0	return; # false
65							}
66							}
67	93					299	$psib = $psib->sprevious_sibling;
68							}
69
70	67					1554	return; # false
71							}
72
73							sub _is_in_conditional_structure {
74	67			67		152	my ( $self, $elem ) = @_;
75
76	67					206	my $stmt = $elem->statement();
77	67		33			1355	while ($stmt && $elem->isa('PPI::Statement::Expression')) {
78							#return if _is_in_conditional_expression($stmt);
79	0					0	$stmt = $stmt->statement();
80							}
81	67	50				201	return if !$stmt;
82
83							# Check if any previous statements in the same scope have regexp matches
84	67					193	my $psib = $stmt->sprevious_sibling;
85	67					1592	while ($psib) {
86	43	100	66			250	if ( $psib->isa( 'PPI::Node' ) and
87							my $match = _find_exposed_match_or_substitute( $psib ) ) {
88	35		100			116	return _is_control_transfer_to_left( $self, $match, $elem ) \|\|
89							_is_control_transfer_to_right( $self, $match, $elem );
90							}
91	8					38	$psib = $psib->sprevious_sibling;
92							}
93
94							# Check for an enclosing 'if', 'unless', 'elsif', 'else', or 'when'
95	32					230	my $parent = $stmt->parent;
96	32					170	while ($parent) { # never false as long as we're inside a PPI::Document
97	43	100	100			336	if ($parent->isa('PPI::Statement::Compound') \|\|
		100
98							$parent->isa('PPI::Statement::When' )
99							) {
100	8					47	return 1;
101							}
102							elsif ($parent->isa('PPI::Structure')) {
103	20	100				116	return 1 if _is_in_conditional_expression($parent);
104	16	100				56	return 1 if $self->_is_in_conditional_structure($parent);
105	11					79	$parent = $parent->parent;
106							}
107							else {
108	15					31	last;
109							}
110							}
111
112	15					52	return; # fail
113							}
114
115							# This subroutine returns true if there is a control transfer to the left of
116							# the match operation which would bypass the capture variable. The arguments
117							# are the match operation and the capture variable.
118							sub _is_control_transfer_to_left {
119	35			35		83	my ( $self, $match, $elem ) = @_;
120							# If a regexp match is found, we succeed if a match failure
121							# appears to throw an exception, and fail otherwise. RT 36081
122	35	100				104	my $prev = $match->sprevious_sibling() or return;
123	9		100			243	while ( not ( $prev->isa( 'PPI::Token::Word' ) &&
124							q<unless> eq $prev->content() ) ) {
125	13	100				189	$prev = $prev->sprevious_sibling() or return;
126							}
127							# In this case we analyze the first thing to appear in the parent of the
128							# 'unless'. This is the simplest case, and it will not be hard to dream up
129							# cases where this is insufficient (e.g. do {something(); die} unless ...)
130	4	50				27	my $parent = $prev->parent() or return;
131	4	50				29	my $first = $parent->schild( 0 ) or return;
132	4	50				52	if ( my $method = _get_method_name( $first ) ) {
133							# Methods can also be exception sources.
134	0					0	return $self->{_exception_source}{ $method->content() };
135							}
136	4		66			14	return $self->{_exception_source}{ $first->content() } \|\|
137							_unambiguous_control_transfer( $first, $elem );
138							}
139
140							# This subroutine returns true if there is a control transfer to the right of
141							# the match operation which would bypass the capture variable. The arguments
142							# are the match operation and the capture variable.
143							sub _is_control_transfer_to_right {
144	31			31		581	my ( $self, $match, $elem ) = @_;
145							# If a regexp match is found, we succeed if a match failure
146							# appears to throw an exception, and fail otherwise. RT 36081
147	31	100				104	my $oper = $match->snext_sibling() or return; # fail
148	29					621	my $oper_content = $oper->content();
149							# We do not check 'dor' or '//' because a match failure does not
150							# return an undefined value.
151	29	100	100			191	q{or} eq $oper_content
152							or q{\|\|} eq $oper_content
153							or return; # fail
154	26	50				70	my $next = $oper->snext_sibling() or return; # fail
155	26	100				492	if ( my $method = _get_method_name( $next ) ) {
156							# Methods can also be exception sources.
157	1					4	return $self->{_exception_source}{ $method->content() };
158							}
159	25		100			73	return $self->{_exception_source}{ $next->content() } \|\|
160							_unambiguous_control_transfer( $next, $elem );
161							}
162
163							# Given a PPI::Node, find the last regexp match or substitution that is
164							# in-scope to the node's next sibling.
165							sub _find_exposed_match_or_substitute { # RT 36081
166	43			43		82	my $elem = shift;
167							FIND_REGEXP_NOT_IN_BLOCK:
168	43					97	foreach my $regexp ( reverse @{ $elem->find(
169							sub {
170	397		100	397		4927	return $_[1]->isa( 'PPI::Token::Regexp::Substitute' )
171							\|\| $_[1]->isa( 'PPI::Token::Regexp::Match' );
172							}
173	43	100				202	) \|\| [] } ) {
174	38					502	my $parent = $regexp->parent();
175	38					219	while ( $parent != $elem ) {
176	10	100				133	$parent->isa( 'PPI::Structure::Block' )
177							and next FIND_REGEXP_NOT_IN_BLOCK;
178	7	50				38	$parent = $parent->parent()
179							or next FIND_REGEXP_NOT_IN_BLOCK;
180							}
181	35					474	return $regexp;
182							}
183	8					107	return;
184							}
185
186							# If the argument introduces a method call, return the method name;
187							# otherwise just return.
188							sub _get_method_name {
189	30			30		66	my ( $elem ) = @_;
190							# We fail unless the element we were given looks like it might be an
191							# object or a class name.
192	30	50				80	$elem or return;
193							(
194	30	50	66			215	$elem->isa( 'PPI::Token::Symbol' ) &&
			33
			66
195							q<$> eq $elem->raw_type() \|\|
196							$elem->isa( 'PPI::Token::Word' ) &&
197							$elem->content() =~ m/ \A [\w:]+ \z /smx
198							) or return;
199							# We skip over all the subscripts and '->' operators to the right of
200							# the original element, failing if we run out of objects.
201	30					257	my $prior;
202	30	50				78	my $next = $elem->snext_sibling() or return;
203	30		66			779	while ( $next->isa( 'PPI::Token::Subscript' ) \|\|
			66
204							$next->isa( 'PPI::Token::Operator' ) &&
205							q{->} eq $next->content() ) {
206	1					9	$prior = $next;
207	1	50				3	$next = $next->snext_sibling or return; # fail
208							}
209							# A method call must have a '->' operator before it.
210	30	50	66			187	( $prior &&
			66
211							$prior->isa( 'PPI::Token::Operator' ) &&
212							q{->} eq $prior->content()
213							) or return;
214							# Anything other than a PPI::Token::Word can not be statically
215							# recognized as a method name.
216	1	50				11	$next->isa( 'PPI::Token::Word' ) or return;
217							# Whatever we have left at this point looks very like a method name.
218	1					5	return $next;
219							}
220
221							# Determine whether the given element represents an unambiguous transfer of
222							# control around anything that follows it in the same block. The arguments are
223							# the element to check, and the capture variable that is the subject of this
224							# call to the policy.
225							sub _unambiguous_control_transfer { # RT 36081.
226	19			19		204	my ( $xfer, $elem ) = @_;
227
228	19					41	my $content = $xfer->content();
229
230							# Anything in the hash is always a transfer of control.
231	19	100				151	return $TRUE if $UNAMBIGUOUS_CONTROL_TRANSFER{ $content };
232
233							# A goto is not unambiguous on the face of it, but at least some forms of
234							# it can be accepted.
235	8	100				87	q<goto> eq $content
236							and return _unambiguous_goto( $xfer, $elem );
237
238							# Anything left at this point is _not_ an unambiguous transfer of control
239							# around whatever follows it.
240	1					7	return;
241							}
242
243							# Determine whether the given goto represents an unambiguous transfer of
244							# control around anything that follows it in the same block. The arguments are
245							# the element to check, and the capture variable that is the subject of this
246							# call to the policy.
247							sub _unambiguous_goto {
248	7			7		16	my ( $xfer, $elem ) = @_;
249
250							# A goto without a target?
251	7	50				18	my $target = $xfer->snext_sibling() or return;
252
253							# The co-routine form of goto is an unambiguous transfer of control.
254	7	100	66			178	$target->isa( 'PPI::Token::Symbol' )
255							and q<&> eq $target->raw_type()
256							and return $TRUE;
257
258							# The label form of goto is an unambiguous transfer of control,
259							# provided the label does not occur between the goto and the capture
260							# variable.
261	6	100				25	if ( $target->isa( 'PPI::Token::Word' ) ) {
262
263							# We need to search in our most-local block, or the document if
264							# there is no enclosing block.
265	5					10	my $container = $target;
266	5					14	while ( my $parent = $container->parent() ) {
267	10					47	$container = $parent;
268	10	100				44	$container->isa( 'PPI::Structure::Block' ) and last;
269							}
270
271							# We search the container for our label. If we find it, we return
272							# true if it occurs before the goto or after the capture variable,
273							# otherwise we return false. If we do not find it we return true.
274							# Note that perl does not seem to consider duplicate labels an
275							# error, but also seems to take the first one in the relevant
276							# scope when this happens.
277	5					17	my $looking_for = qr/ \A @{[ $target->content() ]} \s* : \z /smx;
	5					14
278	5	50				77	my ($start_line, $start_char) = @{ $xfer->location() \|\| [] };
	5					19
279	5	50				86	defined $start_line or return; # document not indexed.
280	5	50				8	my ($end_line, $end_char) = @{ $elem->location() \|\| [] };
	5					26
281	5					84	foreach my $label (
282	5	100				25	@{ $container->find( 'PPI::Token::Label' ) \|\| [] } )
283							{
284	3	50				2161	$label->content() =~ m/$looking_for/smx or next;
285	3	50				26	my ( $line, $char ) = @{ $label->location() \|\| [] };
	3					12
286	3	100	66			61	return $TRUE
			66
287							if $line < $start_line \|\|
288							$line == $start_line && $char < $start_char;
289	2	100	33			25	return $TRUE
			66
290							if $line > $end_line \|\|
291							$line == $end_line && $char > $end_char;
292	1					11	return;
293							}
294	2					7276	return $TRUE;
295							}
296
297							# Any other form of goto can not be statically analyzed, and so is not
298							# an unambiguous transfer of control around the capture variable.
299	1					12	return;
300							}
301
302							1;
303
304							#-----------------------------------------------------------------------------
305
306							__END__
307
308							=pod
309
310							=head1 NAME
311
312							Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest - Capture variable used outside conditional.
313
314							=head1 AFFILIATION
315
316							This Policy is part of the core L<Perl::Critic\|Perl::Critic>
317							distribution.
318
319							=head1 DESCRIPTION
320
321							If a regexp match fails, then any capture variables (C<$1>, C<$2>,
322							...) will be unaffected. They will retain whatever old values they may
323							have had. Therefore it's important to check the return value of a match
324							before using those variables.
325
326							'12312123' =~ /(2)/;
327							print $1; # Prints 2
328							'123123123' =~ /(X)/;
329							print $1; # Prints 2, because $1 has not changed.
330
331							Note that because the values of C<$1> etc will be unaffected, you cannot
332							determine if a match succeeded by checking to see if the capture variables
333							have values.
334
335							# WRONG
336							$str =~ /foo(.+)/;
337							if ( $1 ) {
338							print "I found $1 after 'foo'";
339							}
340
341							This policy checks that the previous regexp for which the capture
342							variable is in-scope is either in a conditional or causes an exception
343							or other control transfer (i.e. C<next>, C<last>, C<redo>, C<return>, or
344							sometimes C<goto>) if the match fails.
345
346							A C<goto> is only accepted by this policy if it is a co-routine call
347							(i.e. C<goto &foo>) or a C<goto LABEL> where the label does not fall
348							between the C<goto> and the capture variable in the scope of the
349							C<goto>. A computed C<goto> (i.e. something like C<goto (qw{foo bar
350							baz})[$i]>) is not accepted by this policy because its target can not be
351							statically determined.
352
353							This policy does not check whether that conditional is actually
354							testing a regexp result, nor does it check whether a regexp actually
355							has a capture in it. Those checks are too hard.
356
357							This policy also does not check arbitrarily complex conditionals guarding
358							regexp results, for pretty much the same reason. Simple things like
359
360							m/(foo)/ or die "No foo!";
361							die "No foo!" unless m/(foo)/;
362
363							will be handled, but something like
364
365							m/(foo)/ or do {
366							... lots of complicated calculations here ...
367							die "No foo!";
368							};
369
370							are beyond its scope.
371
372
373							=head1 CONFIGURATION
374
375							By default, this policy considers C<die>, C<croak>, and C<confess> to
376							throw exceptions. If you have additional subroutines or methods that may
377							be used in lieu of one of these, you can configure them in your
378							perlcriticrc as follows:
379
380							[RegularExpressions::ProhibitCaptureWithoutTest]
381							exception_source = my_exception_generator
382
383							=head1 BUGS
384
385							This policy does not recognize named capture variables. Yet.
386
387							=head1 AUTHOR
388
389							Chris Dolan <cdolan@cpan.org>
390
391							=head1 COPYRIGHT
392
393							Copyright (c) 2006-2017 Chris Dolan.
394
395							This program is free software; you can redistribute it and/or modify
396							it under the same terms as Perl itself.
397
398							=cut
399
400							# Local Variables:
401							# mode: cperl
402							# cperl-indent-level: 4
403							# fill-column: 78
404							# indent-tabs-mode: nil
405							# c-indentation-style: bsd
406							# End:
407							# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :