File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitCaptureWithoutTest.pm
Criterion Covered Total %
statement 123 127 96.8
branch 71 90 78.8
condition 45 63 71.4
subroutine 20 20 100.0
pod 4 5 80.0
total 263 305 86.2


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest;
2              
3 40     40   28806 use 5.010001;
  40         207  
4 40     40   273 use strict;
  40         142  
  40         897  
5 40     40   235 use warnings;
  40         119  
  40         1160  
6 40     40   267 use Readonly;
  40         128  
  40         2228  
7              
8 40     40   346 use Perl::Critic::Utils qw{ :booleans :data_conversion :severities };
  40         166  
  40         2103  
9 40     40   8581 use parent 'Perl::Critic::Policy';
  40         164  
  40         262  
10              
11             our $VERSION = '1.146';
12              
13             #-----------------------------------------------------------------------------
14              
15             Readonly::Hash my %CONDITIONAL_OPERATOR => hashify( qw{ && || ? and or xor } );
16             Readonly::Hash my %UNAMBIGUOUS_CONTROL_TRANSFER => hashify(
17             qw< next last redo return > );
18              
19             Readonly::Scalar my $DESC => q{Capture variable used outside conditional};
20             Readonly::Scalar my $EXPL => [ 253 ];
21              
22             #-----------------------------------------------------------------------------
23              
24             sub supported_parameters { return (
25             {
26 115     115 0 2328 name => 'exception_source',
27             description => 'Names of ways to generate exceptions',
28             behavior => 'string list',
29             list_always_present_values => [ qw{ die croak confess } ],
30             }
31             );
32             }
33 86     86 1 470 sub default_severity { return $SEVERITY_MEDIUM }
34 86     86 1 374 sub default_themes { return qw(core pbp maintenance certrule ) }
35 55     55 1 180 sub applies_to { return 'PPI::Token::Magic' }
36              
37             #-----------------------------------------------------------------------------
38              
39             sub violates {
40 76     76 1 179 my ($self, $elem, $doc) = @_;
41             # TODO named capture variables
42 76 100       224 return if $elem !~ m/\A \$[1-9] \z/xms;
43 59 100       458 return if _is_in_conditional_expression($elem);
44 51 100       175 return if $self->_is_in_conditional_structure($elem);
45 12         60 return $self->violation( $DESC, $EXPL, $elem );
46             }
47              
48             sub _is_in_conditional_expression {
49 79     79   151 my $elem = shift;
50              
51             # simplistic check: is there a conditional operator between a match and
52             # the capture var?
53 79         249 my $psib = $elem->sprevious_sibling;
54 79         2038 while ($psib) {
55 105 100       989 if ($psib->isa('PPI::Token::Operator')) {
56 24         60 my $op = $psib->content;
57 24 100       174 if ( $CONDITIONAL_OPERATOR{ $op } ) {
58 12         83 $psib = $psib->sprevious_sibling;
59 12         255 while ($psib) {
60 12 100       69 return 1 if ($psib->isa('PPI::Token::Regexp::Match'));
61 4 50       21 return 1 if ($psib->isa('PPI::Token::Regexp::Substitute'));
62 0         0 $psib = $psib->sprevious_sibling;
63             }
64 0         0 return; # false
65             }
66             }
67 93         299 $psib = $psib->sprevious_sibling;
68             }
69              
70 67         1554 return; # false
71             }
72              
73             sub _is_in_conditional_structure {
74 67     67   152 my ( $self, $elem ) = @_;
75              
76 67         206 my $stmt = $elem->statement();
77 67   33     1355 while ($stmt && $elem->isa('PPI::Statement::Expression')) {
78             #return if _is_in_conditional_expression($stmt);
79 0         0 $stmt = $stmt->statement();
80             }
81 67 50       201 return if !$stmt;
82              
83             # Check if any previous statements in the same scope have regexp matches
84 67         193 my $psib = $stmt->sprevious_sibling;
85 67         1592 while ($psib) {
86 43 100 66     250 if ( $psib->isa( 'PPI::Node' ) and
87             my $match = _find_exposed_match_or_substitute( $psib ) ) {
88 35   100     116 return _is_control_transfer_to_left( $self, $match, $elem ) ||
89             _is_control_transfer_to_right( $self, $match, $elem );
90             }
91 8         38 $psib = $psib->sprevious_sibling;
92             }
93              
94             # Check for an enclosing 'if', 'unless', 'elsif', 'else', or 'when'
95 32         230 my $parent = $stmt->parent;
96 32         170 while ($parent) { # never false as long as we're inside a PPI::Document
97 43 100 100     336 if ($parent->isa('PPI::Statement::Compound') ||
    100          
98             $parent->isa('PPI::Statement::When' )
99             ) {
100 8         47 return 1;
101             }
102             elsif ($parent->isa('PPI::Structure')) {
103 20 100       116 return 1 if _is_in_conditional_expression($parent);
104 16 100       56 return 1 if $self->_is_in_conditional_structure($parent);
105 11         79 $parent = $parent->parent;
106             }
107             else {
108 15         31 last;
109             }
110             }
111              
112 15         52 return; # fail
113             }
114              
115             # This subroutine returns true if there is a control transfer to the left of
116             # the match operation which would bypass the capture variable. The arguments
117             # are the match operation and the capture variable.
118             sub _is_control_transfer_to_left {
119 35     35   83 my ( $self, $match, $elem ) = @_;
120             # If a regexp match is found, we succeed if a match failure
121             # appears to throw an exception, and fail otherwise. RT 36081
122 35 100       104 my $prev = $match->sprevious_sibling() or return;
123 9   100     243 while ( not ( $prev->isa( 'PPI::Token::Word' ) &&
124             q<unless> eq $prev->content() ) ) {
125 13 100       189 $prev = $prev->sprevious_sibling() or return;
126             }
127             # In this case we analyze the first thing to appear in the parent of the
128             # 'unless'. This is the simplest case, and it will not be hard to dream up
129             # cases where this is insufficient (e.g. do {something(); die} unless ...)
130 4 50       27 my $parent = $prev->parent() or return;
131 4 50       29 my $first = $parent->schild( 0 ) or return;
132 4 50       52 if ( my $method = _get_method_name( $first ) ) {
133             # Methods can also be exception sources.
134 0         0 return $self->{_exception_source}{ $method->content() };
135             }
136 4   66     14 return $self->{_exception_source}{ $first->content() } ||
137             _unambiguous_control_transfer( $first, $elem );
138             }
139              
140             # This subroutine returns true if there is a control transfer to the right of
141             # the match operation which would bypass the capture variable. The arguments
142             # are the match operation and the capture variable.
143             sub _is_control_transfer_to_right {
144 31     31   581 my ( $self, $match, $elem ) = @_;
145             # If a regexp match is found, we succeed if a match failure
146             # appears to throw an exception, and fail otherwise. RT 36081
147 31 100       104 my $oper = $match->snext_sibling() or return; # fail
148 29         621 my $oper_content = $oper->content();
149             # We do not check 'dor' or '//' because a match failure does not
150             # return an undefined value.
151 29 100 100     191 q{or} eq $oper_content
152             or q{||} eq $oper_content
153             or return; # fail
154 26 50       70 my $next = $oper->snext_sibling() or return; # fail
155 26 100       492 if ( my $method = _get_method_name( $next ) ) {
156             # Methods can also be exception sources.
157 1         4 return $self->{_exception_source}{ $method->content() };
158             }
159 25   100     73 return $self->{_exception_source}{ $next->content() } ||
160             _unambiguous_control_transfer( $next, $elem );
161             }
162              
163             # Given a PPI::Node, find the last regexp match or substitution that is
164             # in-scope to the node's next sibling.
165             sub _find_exposed_match_or_substitute { # RT 36081
166 43     43   82 my $elem = shift;
167             FIND_REGEXP_NOT_IN_BLOCK:
168 43         97 foreach my $regexp ( reverse @{ $elem->find(
169             sub {
170 397   100 397   4927 return $_[1]->isa( 'PPI::Token::Regexp::Substitute' )
171             || $_[1]->isa( 'PPI::Token::Regexp::Match' );
172             }
173 43 100       202 ) || [] } ) {
174 38         502 my $parent = $regexp->parent();
175 38         219 while ( $parent != $elem ) {
176 10 100       133 $parent->isa( 'PPI::Structure::Block' )
177             and next FIND_REGEXP_NOT_IN_BLOCK;
178 7 50       38 $parent = $parent->parent()
179             or next FIND_REGEXP_NOT_IN_BLOCK;
180             }
181 35         474 return $regexp;
182             }
183 8         107 return;
184             }
185              
186             # If the argument introduces a method call, return the method name;
187             # otherwise just return.
188             sub _get_method_name {
189 30     30   66 my ( $elem ) = @_;
190             # We fail unless the element we were given looks like it might be an
191             # object or a class name.
192 30 50       80 $elem or return;
193             (
194 30 50 66     215 $elem->isa( 'PPI::Token::Symbol' ) &&
      33        
      66        
195             q<$> eq $elem->raw_type() ||
196             $elem->isa( 'PPI::Token::Word' ) &&
197             $elem->content() =~ m/ \A [\w:]+ \z /smx
198             ) or return;
199             # We skip over all the subscripts and '->' operators to the right of
200             # the original element, failing if we run out of objects.
201 30         257 my $prior;
202 30 50       78 my $next = $elem->snext_sibling() or return;
203 30   66     779 while ( $next->isa( 'PPI::Token::Subscript' ) ||
      66        
204             $next->isa( 'PPI::Token::Operator' ) &&
205             q{->} eq $next->content() ) {
206 1         9 $prior = $next;
207 1 50       3 $next = $next->snext_sibling or return; # fail
208             }
209             # A method call must have a '->' operator before it.
210 30 50 66     187 ( $prior &&
      66        
211             $prior->isa( 'PPI::Token::Operator' ) &&
212             q{->} eq $prior->content()
213             ) or return;
214             # Anything other than a PPI::Token::Word can not be statically
215             # recognized as a method name.
216 1 50       11 $next->isa( 'PPI::Token::Word' ) or return;
217             # Whatever we have left at this point looks very like a method name.
218 1         5 return $next;
219             }
220              
221             # Determine whether the given element represents an unambiguous transfer of
222             # control around anything that follows it in the same block. The arguments are
223             # the element to check, and the capture variable that is the subject of this
224             # call to the policy.
225             sub _unambiguous_control_transfer { # RT 36081.
226 19     19   204 my ( $xfer, $elem ) = @_;
227              
228 19         41 my $content = $xfer->content();
229              
230             # Anything in the hash is always a transfer of control.
231 19 100       151 return $TRUE if $UNAMBIGUOUS_CONTROL_TRANSFER{ $content };
232              
233             # A goto is not unambiguous on the face of it, but at least some forms of
234             # it can be accepted.
235 8 100       87 q<goto> eq $content
236             and return _unambiguous_goto( $xfer, $elem );
237              
238             # Anything left at this point is _not_ an unambiguous transfer of control
239             # around whatever follows it.
240 1         7 return;
241             }
242              
243             # Determine whether the given goto represents an unambiguous transfer of
244             # control around anything that follows it in the same block. The arguments are
245             # the element to check, and the capture variable that is the subject of this
246             # call to the policy.
247             sub _unambiguous_goto {
248 7     7   16 my ( $xfer, $elem ) = @_;
249              
250             # A goto without a target?
251 7 50       18 my $target = $xfer->snext_sibling() or return;
252              
253             # The co-routine form of goto is an unambiguous transfer of control.
254 7 100 66     178 $target->isa( 'PPI::Token::Symbol' )
255             and q<&> eq $target->raw_type()
256             and return $TRUE;
257              
258             # The label form of goto is an unambiguous transfer of control,
259             # provided the label does not occur between the goto and the capture
260             # variable.
261 6 100       25 if ( $target->isa( 'PPI::Token::Word' ) ) {
262              
263             # We need to search in our most-local block, or the document if
264             # there is no enclosing block.
265 5         10 my $container = $target;
266 5         14 while ( my $parent = $container->parent() ) {
267 10         47 $container = $parent;
268 10 100       44 $container->isa( 'PPI::Structure::Block' ) and last;
269             }
270              
271             # We search the container for our label. If we find it, we return
272             # true if it occurs before the goto or after the capture variable,
273             # otherwise we return false. If we do not find it we return true.
274             # Note that perl does not seem to consider duplicate labels an
275             # error, but also seems to take the first one in the relevant
276             # scope when this happens.
277 5         17 my $looking_for = qr/ \A @{[ $target->content() ]} \s* : \z /smx;
  5         14  
278 5 50       77 my ($start_line, $start_char) = @{ $xfer->location() || [] };
  5         19  
279 5 50       86 defined $start_line or return; # document not indexed.
280 5 50       8 my ($end_line, $end_char) = @{ $elem->location() || [] };
  5         26  
281 5         84 foreach my $label (
282 5 100       25 @{ $container->find( 'PPI::Token::Label' ) || [] } )
283             {
284 3 50       2161 $label->content() =~ m/$looking_for/smx or next;
285 3 50       26 my ( $line, $char ) = @{ $label->location() || [] };
  3         12  
286 3 100 66     61 return $TRUE
      66        
287             if $line < $start_line ||
288             $line == $start_line && $char < $start_char;
289 2 100 33     25 return $TRUE
      66        
290             if $line > $end_line ||
291             $line == $end_line && $char > $end_char;
292 1         11 return;
293             }
294 2         7276 return $TRUE;
295             }
296              
297             # Any other form of goto can not be statically analyzed, and so is not
298             # an unambiguous transfer of control around the capture variable.
299 1         12 return;
300             }
301              
302             1;
303              
304             #-----------------------------------------------------------------------------
305              
306             __END__
307              
308             =pod
309              
310             =head1 NAME
311              
312             Perl::Critic::Policy::RegularExpressions::ProhibitCaptureWithoutTest - Capture variable used outside conditional.
313              
314             =head1 AFFILIATION
315              
316             This Policy is part of the core L<Perl::Critic|Perl::Critic>
317             distribution.
318              
319             =head1 DESCRIPTION
320              
321             If a regexp match fails, then any capture variables (C<$1>, C<$2>,
322             ...) will be unaffected. They will retain whatever old values they may
323             have had. Therefore it's important to check the return value of a match
324             before using those variables.
325              
326             '12312123' =~ /(2)/;
327             print $1; # Prints 2
328             '123123123' =~ /(X)/;
329             print $1; # Prints 2, because $1 has not changed.
330              
331             Note that because the values of C<$1> etc will be unaffected, you cannot
332             determine if a match succeeded by checking to see if the capture variables
333             have values.
334              
335             # WRONG
336             $str =~ /foo(.+)/;
337             if ( $1 ) {
338             print "I found $1 after 'foo'";
339             }
340              
341             This policy checks that the previous regexp for which the capture
342             variable is in-scope is either in a conditional or causes an exception
343             or other control transfer (i.e. C<next>, C<last>, C<redo>, C<return>, or
344             sometimes C<goto>) if the match fails.
345              
346             A C<goto> is only accepted by this policy if it is a co-routine call
347             (i.e. C<goto &foo>) or a C<goto LABEL> where the label does not fall
348             between the C<goto> and the capture variable in the scope of the
349             C<goto>. A computed C<goto> (i.e. something like C<goto (qw{foo bar
350             baz})[$i]>) is not accepted by this policy because its target can not be
351             statically determined.
352              
353             This policy does not check whether that conditional is actually
354             testing a regexp result, nor does it check whether a regexp actually
355             has a capture in it. Those checks are too hard.
356              
357             This policy also does not check arbitrarily complex conditionals guarding
358             regexp results, for pretty much the same reason. Simple things like
359              
360             m/(foo)/ or die "No foo!";
361             die "No foo!" unless m/(foo)/;
362              
363             will be handled, but something like
364              
365             m/(foo)/ or do {
366             ... lots of complicated calculations here ...
367             die "No foo!";
368             };
369              
370             are beyond its scope.
371              
372              
373             =head1 CONFIGURATION
374              
375             By default, this policy considers C<die>, C<croak>, and C<confess> to
376             throw exceptions. If you have additional subroutines or methods that may
377             be used in lieu of one of these, you can configure them in your
378             perlcriticrc as follows:
379              
380             [RegularExpressions::ProhibitCaptureWithoutTest]
381             exception_source = my_exception_generator
382              
383             =head1 BUGS
384              
385             This policy does not recognize named capture variables. Yet.
386              
387             =head1 AUTHOR
388              
389             Chris Dolan <cdolan@cpan.org>
390              
391             =head1 COPYRIGHT
392              
393             Copyright (c) 2006-2017 Chris Dolan.
394              
395             This program is free software; you can redistribute it and/or modify
396             it under the same terms as Perl itself.
397              
398             =cut
399              
400             # Local Variables:
401             # mode: cperl
402             # cperl-indent-level: 4
403             # fill-column: 78
404             # indent-tabs-mode: nil
405             # c-indentation-style: bsd
406             # End:
407             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :