File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm
Criterion Covered Total %
statement 285 290 98.2
branch 179 218 82.1
condition 55 71 77.4
subroutine 45 46 97.8
pod 4 5 80.0
total 568 630 90.1


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2              
3 40     40   31093 use 5.010001;
  40         199  
4 40     40   265 use strict;
  40         128  
  40         830  
5 40     40   246 use warnings;
  40         109  
  40         1005  
6              
7 40     40   237 use Carp;
  40         131  
  40         2567  
8 40     40   348 use English qw(-no_match_vars);
  40         112  
  40         260  
9 40     40   15032 use List::SomeUtils qw(none);
  40         163  
  40         2085  
10 40     40   366 use Readonly;
  40         128  
  40         2141  
11 40     40   357 use Scalar::Util qw(refaddr);
  40         122  
  40         2368  
12              
13 40     40   346 use Perl::Critic::Exception::Fatal::Internal qw{ throw_internal };
  40         148  
  40         2654  
14 40         2143 use Perl::Critic::Utils qw{
15             :booleans :characters :severities hashify precedence_of
16             split_nodes_on_comma
17 40     40   333 };
  40         148  
18 40     40   14659 use parent 'Perl::Critic::Policy';
  40         160  
  40         308  
19              
20             our $VERSION = '1.146';
21              
22             #-----------------------------------------------------------------------------
23              
24             Readonly::Scalar my $SPLIT => q{split};
25             Readonly::Scalar my $WHILE => q{while};
26              
27             Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
28             hashify( qw< ${^CAPTURE} > );
29             # TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
30             # as a use of capture variable n.
31             Readonly::Hash my %CAPTURE_REFERENCE => (
32             hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
33             %ZERO_BASED_CAPTURE_REFERENCE );
34             Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
35             hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
36             %CAPTURE_REFERENCE );
37             Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
38             Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
39             hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
40             %CAPTURE_ARRAY );
41              
42             Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
43             Readonly::Scalar my $EXPL => [252];
44              
45             #-----------------------------------------------------------------------------
46              
47 140     140 0 1809 sub supported_parameters { return qw() }
48 99     99 1 447 sub default_severity { return $SEVERITY_MEDIUM }
49 86     86 1 412 sub default_themes { return qw( core pbp maintenance ) }
50             sub applies_to {
51 81     81 1 292 return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >
52             }
53              
54             #-----------------------------------------------------------------------------
55              
56             Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
57              
58             sub violates {
59 145     145 1 367 my ( $self, $elem, $doc ) = @_;
60              
61             # optimization: don't bother parsing the regexp if there are no parens
62 145 100       468 return if 0 > index $elem->content(), '(';
63              
64 140 50       1155 my $re = $doc->ppix_regexp_from_element( $elem ) or return;
65 140 100       791700 $re->failures() and return;
66              
67 138 100       1150 my $ncaptures = $re->max_capture_number() or return;
68              
69 137         4220 my @captures = ( undef ) x $ncaptures; # List of expected captures
70              
71 137         251 my %named_captures; # List of expected named captures.
72             # Unlike the numbered capture logic, %named_captures
73             # entries are made undefined when a use of the name is
74             # found. Otherwise two hashes would be needed, one to
75             # become defined when a use is found, and one to hold
76             # the mapping of name to number.
77 137 100       226 foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
  137         359  
78             ) || [] } ) {
79             # There can be more than one capture with the same name, so we need to
80             # record all of them. There will be duplications if the 'branch reset'
81             # "(?| ... )" pattern is used, but this is benign given how numbered
82             # captures are recorded.
83 16   100     4065 push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number();
  16         60  
84             }
85              
86             # Look for references to the capture in the regex itself
87 137 100       34213 return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
88              
89 120 100 100     532 if ( $re->modifier_asserted( 'g' )
90             and not _check_if_in_while_condition_or_block( $elem ) ) {
91 14         40 $ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
92 14         64 $#captures = $ncaptures - 1;
93             }
94              
95 120 100 100     2361 return if _enough_assignments($elem, \@captures) && !%named_captures;
96 87 100 100     350 return if _is_in_slurpy_array_context($elem) && !%named_captures;
97 71 100       241 return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
98              
99 25         153 return $self->violation( $DESC, $EXPL, $elem );
100             }
101              
102             # Find uses of both numbered and named capture variables in the regexp itself.
103             # Return true if all are used.
104             sub _enough_uses_in_regexp {
105 137     137   368 my ( $re, $captures, $named_captures, $doc ) = @_;
106              
107             # Look for references to the capture in the regex itself. Note that this
108             # will also find backreferences in the replacement string of s///.
109 137 100       226 foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
  137         406  
110             || [] } ) {
111 2 50       511 if ( $token->is_named() ) {
112 0         0 _record_named_capture( $token->name(), $captures, $named_captures );
113             } else {
114 2         13 _record_numbered_capture( $token->absolute(), $captures );
115             }
116             }
117              
118 137 100       36625 foreach my $token ( @{ $re->find(
  137         361  
119             'PPIx::Regexp::Token::Code' ) || [] } ) {
120 22 50       6573 my $ppi = $token->ppi() or next;
121 22         350 _check_node_children( $ppi, {
122             regexp => $re,
123             numbered_captures => $captures,
124             named_captures => $named_captures,
125             document => $doc,
126             }, _make_regexp_checker() );
127             }
128              
129 138     138   776 return ( none {not defined} @{$captures} )
130             && ( !%{$named_captures} ||
131 137   66 3   30829 none {defined} values %{$named_captures} );
  3         33  
132             }
133              
134             sub _enough_assignments {
135 120     120   278 my ($elem, $captures) = @_;
136              
137             # look backward for the assignment operator
138 120         588 my $psib = $elem->sprevious_sibling;
139             SIBLING:
140 120         3409 while (1) {
141 195 100       1935 return if !$psib;
142 116 100       382 if ($psib->isa('PPI::Token::Operator')) {
143 70 100       214 last SIBLING if q{=} eq $psib->content;
144 32 100       189 return if q{!~} eq $psib->content;
145             }
146 75         254 $psib = $psib->sprevious_sibling;
147             }
148              
149 38         278 $psib = $psib->sprevious_sibling;
150 38 50       797 return if !$psib; # syntax error: '=' at the beginning of a statement???
151              
152 38 100       170 if ($psib->isa('PPI::Token::Symbol')) {
    100          
    50          
153             # @foo = m/(foo)/
154             # @$foo = m/(foo)/
155             # %foo = m/(foo)/
156             # %$foo = m/(foo)/
157 13 50       55 return $TRUE if _symbol_is_slurpy($psib);
158              
159             } elsif ($psib->isa('PPI::Structure::Block')) {
160             # @{$foo} = m/(foo)/
161             # %{$foo} = m/(foo)/
162 2 50       10 return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
163              
164             } elsif ($psib->isa('PPI::Structure::List')) {
165             # () = m/(foo)/
166             # ($foo) = m/(foo)/
167             # ($foo,$bar) = m/(foo)(bar)/
168             # (@foo) = m/(foo)(bar)/
169             # ($foo,@foo) = m/(foo)(bar)/
170             # ($foo,@$foo) = m/(foo)(bar)/
171             # ($foo,@{$foo}) = m/(foo)(bar)/
172              
173 23         483 my @args = $psib->schildren;
174 23 100       272 return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
175              
176             # Forward looking: PPI might change in v1.200 so schild(0) is a
177             # PPI::Statement::Expression.
178 21 50 33     158 if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
179 21         92 @args = $args[0]->schildren;
180             }
181              
182 21         282 my @parts = split_nodes_on_comma(@args);
183             PART:
184 21         75 for my $i (0 .. $#parts) {
185 33 100       47 if (1 == @{$parts[$i]}) {
  33         88  
186 29         45 my $var = $parts[$i]->[0];
187 29 100 66     124 if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
188 24 100       61 return $TRUE if _has_array_sigil($var);
189             }
190             }
191 28         191 _record_numbered_capture( $i + 1, $captures );
192             # ith variable capture
193             }
194             }
195              
196 16     27   58 return none {not defined} @{$captures};
  27         146  
  16         63  
197             }
198              
199             sub _symbol_is_slurpy {
200 13     13   29 my ($symbol) = @_;
201              
202 13 100       37 return $TRUE if _has_array_sigil($symbol);
203 3 100       28 return $TRUE if _has_hash_sigil($symbol);
204 2 50       18 return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
205 0         0 return;
206             }
207              
208             sub _has_array_sigil {
209 37     37   74 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
210              
211 37         96 return q{@} eq substr $elem->content, 0, 1;
212             }
213              
214             sub _has_hash_sigil {
215 3     3   11 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
216              
217 3         9 return q{%} eq substr $elem->content, 0, 1;
218             }
219              
220             sub _is_preceded_by_array_or_hash_cast {
221 4     4   10 my ($elem) = @_;
222 4         17 my $psib = $elem->sprevious_sibling;
223 4         67 my $cast;
224 4   66     52 while ($psib && $psib->isa('PPI::Token::Cast')) {
225 4         11 $cast = $psib;
226 4         20 $psib = $psib->sprevious_sibling;
227             }
228 4 50       72 return if !$cast;
229 4         15 my $sigil = substr $cast->content, 0, 1;
230 4   66     88 return q{@} eq $sigil || q{%} eq $sigil;
231             }
232              
233             sub _is_in_slurpy_array_context {
234 87     87   219 my ($elem) = @_;
235              
236             # return true is the result of the regexp is passed to a subroutine.
237             # doesn't check for array context due to assignment.
238              
239             # look backward for explicit regex operator
240 87         238 my $psib = $elem->sprevious_sibling;
241 87 100 100     1533 if ($psib && $psib->content eq q{=~}) {
242             # Track back through value
243 2         16 $psib = _skip_lhs($psib);
244             }
245              
246 87 100       375 if (!$psib) {
247 60         232 my $parent = $elem->parent;
248 60 50       334 return if !$parent;
249 60 50       300 if ($parent->isa('PPI::Statement')) {
250 60         164 $parent = $parent->parent;
251 60 50       298 return if !$parent;
252             }
253              
254             # Return true if we have a list that isn't part of a foreach loop.
255             # TECHNICAL DEBT: This code is basically shared with
256             # RequireCheckingReturnValueOfEval. I don't want to put this code
257             # into Perl::Critic::Utils::*, but I don't have time to sort out
258             # PPIx::Utilities::Structure::List yet.
259 60 100       311 if ( $parent->isa('PPI::Structure::List') ) {
260 5 50       46 my $parent_statement = $parent->statement() or return $TRUE;
261 5 100       135 return $TRUE if not
262             $parent_statement->isa('PPI::Statement::Compound');
263 2 50       9 return $TRUE if $parent_statement->type() ne 'foreach';
264             }
265              
266 57 100       400 return $TRUE if $parent->isa('PPI::Structure::Constructor');
267 55 100       248 if ($parent->isa('PPI::Structure::Block')) {
268 5 100       18 return $TRUE
269             if
270             refaddr($elem->statement)
271             eq refaddr([$parent->schildren]->[-1]);
272             }
273 53         353 return;
274             }
275 27 100       135 if ($psib->isa('PPI::Token::Operator')) {
276             # most operators kill slurpiness (except assignment, which is handled elsewhere)
277 19 100       54 return $TRUE if q{,} eq $psib->content;
278 17         106 return;
279             }
280 8         61 return $TRUE;
281             }
282              
283             sub _skip_lhs {
284 2     2   5 my ($elem) = @_;
285              
286             # TODO: better implementation to handle casts, expressions, subcalls, etc.
287 2         8 $elem = $elem->sprevious_sibling();
288              
289 2         46 return $elem;
290             }
291              
292             sub _enough_magic {
293 71     71   171 my ($elem, $re, $captures, $named_captures, $doc) = @_;
294              
295 71         233 _check_for_magic($elem, $re, $captures, $named_captures, $doc);
296              
297 85     85   294 return ( none {not defined} @{$captures} )
298             && ( !%{$named_captures} ||
299 71   66 6   1067 none {defined} values %{$named_captures} );
  6         65  
300             }
301              
302             # void return
303             sub _check_for_magic {
304 71     71   166 my ($elem, $re, $captures, $named_captures, $doc) = @_;
305              
306             # Search for $1..$9 in :
307             # * the rest of this statement
308             # * subsequent sibling statements
309             # * if this is in a conditional boolean, the if/else bodies of the conditional
310             # * if this is in a while/for condition, the loop body
311             # But NO intervening regexps!
312              
313             # Package up the usual arguments for _check_rest_of_statement().
314 71         289 my $arg = {
315             regexp => $re,
316             numbered_captures => $captures,
317             named_captures => $named_captures,
318             document => $doc,
319             };
320              
321             # Capture whether or not the regular expression is negated -- that
322             # is, whether it is preceded by the '!~' binding operator.
323 71 100       213 if ( my $prior_token = $elem->sprevious_sibling() ) {
324 18   100     454 $arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
325             q<!~> eq $prior_token->content();
326             }
327              
328 71 100       1074 return if ! _check_rest_of_statement( $elem, $arg );
329              
330 66         229 my $parent = $elem->parent();
331 66   100     698 while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
332 165 100       932 return if ! _check_rest_of_statement( $parent, $arg );
333 131         377 $parent = $parent->parent();
334             }
335              
336 32         206 return;
337             }
338              
339             # Check if we are in the condition or block of a 'while'
340             sub _check_if_in_while_condition_or_block {
341 17     17   396 my ( $elem ) = @_;
342 17 50       84 $elem or return;
343              
344 17 50       87 my $parent = $elem->parent() or return;
345 17 50       194 $parent->isa( 'PPI::Statement' ) or return;
346              
347 17 50       57 my $item = $parent = $parent->parent() or return;
348 17 100       158 if ( $item->isa( 'PPI::Structure::Block' ) ) {
349 1 50       6 $item = $item->sprevious_sibling() or return;
350             }
351 17 100       145 $item->isa( 'PPI::Structure::Condition' ) or return;
352              
353 3 50       16 $item = $item->sprevious_sibling() or return;
354 3 50       130 $item->isa( 'PPI::Token::Word' ) or return;
355              
356 3         20 return $WHILE eq $item->content();
357             }
358              
359             {
360             # Shortcut operators '||', '//', and 'or' can cause everything after
361             # them to be skipped. 'and' trumps '||' and '//', and causes things
362             # to be evaluated again. The value is true to skip, false to cancel
363             # skipping.
364             Readonly::Hash my %SHORTCUT_OPERATOR => (
365             q<||> => $FALSE,
366             q<//> => $FALSE,
367             and => $TRUE,
368             or => $FALSE,
369             );
370              
371             # RT #38942
372             # The issue in the ticket is that in something like
373             # if ( /(a)/ || /(b)/ ) {
374             # say $1
375             # }
376             # the capture variable can come from either /(a)/ or /(b)/. If we
377             # don't take into account the short-cutting nature of the '||' we
378             # erroneously conclude that the capture in /(a)/ is not used. So we
379             # need to skip every regular expression after an alternation.
380             #
381             # The trick is that we want to still mark magic variables, because
382             # of code like
383             # my $foo = $1 || $2;
384             # so we can't just ignore everything after an alternation.
385             #
386             # To do all this correctly, we have to track precedence, and start
387             # paying attention again if an 'and' is found after a '||'.
388              
389             # Subroutine _make_regexp_checker() manufactures a snippet of code
390             # which is used to track regular expressions. It takes one optional
391             # argument, which is the snippet used to track the parent object's
392             # regular expressions.
393             #
394             # The snippet is passed each token encountered, and returns true if
395             # the scan for capture variables is to be stopped. This will happen
396             # if the token is a regular expression which is _not_ to the right
397             # of an alternation operator ('||', '//', or 'or'), or it _is_ to
398             # the right of an 'and', without an intervening alternation
399             # operator.
400             #
401             # If _make_regexp_checker() was passed a snippet which
402             # returns false on encountering a regular expression, the returned
403             # snippet always returns false, for the benefit of code like
404             # /(a)/ || ( /(b)/ || /(c)/ ).
405              
406             sub _make_regexp_checker {
407 596     596   876 my ( $parent ) = @_;
408              
409             $parent
410             and not $parent->()
411 596 50 66 0   1373 and return sub { return $FALSE };
  0         0  
412              
413 596         940 my $check = $TRUE;
414 596         779 my $precedence = 0;
415              
416             return sub {
417 1157     1157   1737 my ( $elem ) = @_;
418              
419 1157 100       2836 $elem or return $check;
420              
421 819 100       2588 if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
422 48 100       130 return _regexp_is_in_split( $elem ) ? $FALSE : $check;
423             }
424              
425 771 100 66     2375 if ( $elem->isa( 'PPI::Token::Structure' )
426             && q<;> eq $elem->content() ) {
427 87         463 $check = $TRUE;
428 87         155 $precedence = 0;
429 87         254 return $FALSE;
430             }
431              
432 684 100       2245 $elem->isa( 'PPI::Token::Operator' )
433             or return $FALSE;
434              
435 83         194 my $content = $elem->content();
436 83 100       547 defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
437             or return $FALSE;
438              
439 21         242 my $oper_precedence = precedence_of( $content );
440 21 100       156 $oper_precedence >= $precedence
441             or return $FALSE;
442              
443 20         34 $precedence = $oper_precedence;
444 20         41 $check = $oper_check;
445              
446 20         59 return $FALSE;
447 596         2116 };
448             }
449             }
450              
451             # Argument is regexp.
452             # True if it is the regexp in a split()
453             sub _regexp_is_in_split {
454 48     48   81 my ( $elem ) = @_;
455              
456 48         104 my $prev;
457 48 100       116 if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
458             # Maybe we have split( /.../, ... )
459 32 50       674 my $stmt = $elem->statement()
460             or return $FALSE;
461 32 50       584 my $list = $stmt->parent()
462             or return $FALSE;
463 32 50       220 $prev = $elem->sprevious_sibling()
464             or return $FALSE;
465             }
466 16   100     601 return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
467             }
468              
469              
470             # false if we hit another regexp
471             # The arguments are:
472             # $elem - The PPI::Element whose siblings are to be checked;
473             # $arg - A hash reference containing the following keys:
474             # regexp => the relevant PPIx::Regexp object;
475             # numbered_captures => a reference to the array used to track the
476             # use of numbered captures;
477             # named_captures => a reference to the hash used to track the
478             # use of named captures;
479             # negated => true if the regexp was bound to its target with the
480             # '!~' operator;
481             # document => a reference to the Perl::Critic::Document;
482             # Converted to passing the arguments everyone gets in a hash because of
483             # the need to add the 'negated' argument, which would put us at six
484             # arguments.
485             sub _check_rest_of_statement {
486 236     236   460 my ( $elem, $arg ) = @_;
487              
488 236         461 my $checker = _make_regexp_checker();
489 236         744 my $nsib = $elem->snext_sibling;
490              
491             # If we are an if (or elsif) and the result of the regexp is
492             # negated, we skip the first block found. RT #69867
493 236 100 100     4925 if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
494 1   33     33 while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
495 0         0 $nsib = $nsib->snext_sibling();
496             }
497 1 50       7 $nsib and $nsib = $nsib->snext_sibling();
498             }
499              
500 236         648 while ($nsib) {
501 203 100       2401 return if $checker->($nsib);
502 198 100       734 if ($nsib->isa('PPI::Node')) {
503 88 100       255 return if ! _check_node_children($nsib, $arg, $checker );
504             } else {
505             _mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
506 110         344 $arg->{named_captures}, $arg->{document} );
507             }
508 164         1152 $nsib = $nsib->snext_sibling;
509             }
510 197         2849 return $TRUE;
511             }
512              
513             {
514              
515             Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
516              
517             # Return true if the argument is the condition of an if or elsif
518             # statement, otherwise return false.
519             sub _is_condition_of_if_statement {
520 11     11   19 my ( $elem ) = @_;
521 11 100 66     90 $elem
522             and $elem->isa( 'PPI::Structure::Condition' )
523             or return $FALSE;
524 1 50       5 my $psib = $elem->sprevious_sibling()
525             or return $FALSE;
526 1 50       31 $psib->isa( 'PPI::Token::Word' )
527             or return $FALSE;
528 1         4 return $IS_IF_STATEMENT{ $psib->content() };
529              
530             }
531             }
532              
533             # false if we hit another regexp
534             # The arguments are:
535             # $elem - The PPI::Node whose children are to be checked;
536             # $arg - A hash reference containing the following keys:
537             # regexp => the relevant PPIx::Regexp object;
538             # numbered_captures => a reference to the array used to track the
539             # use of numbered captures;
540             # named_captures => a reference to the hash used to track the
541             # use of named captures;
542             # document => a reference to the Perl::Critic::Document;
543             # $parent_checker - The parent's regexp checking code snippet,
544             # manufactured by _make_regexp_checker(). This argument is not in
545             # the $arg hash because that hash is shared among levels of the
546             # parse tree, whereas the regexp checker is not.
547             # TODO the things in the $arg hash are widely shared among the various
548             # pieces/parts of this policy; maybe more subroutines should use this
549             # hash rather than passing all this stuff around as individual
550             # arguments. This particular subroutine got the hash-reference treatment
551             # because Subroutines::ProhibitManyArgs started complaining when the
552             # checker argument was added.
553             sub _check_node_children {
554 338     338   599 my ($elem, $arg, $parent_checker) = @_;
555              
556             # caveat: this will descend into subroutine definitions...
557              
558 338         545 my $checker = _make_regexp_checker($parent_checker);
559 338         1049 for my $child ($elem->schildren) {
560 616 100       5023 return if $checker->($child);
561 582 100       1831 if ($child->isa('PPI::Node')) {
562 228 100       478 return if ! _check_node_children($child, $arg, $checker);
563             } else {
564             _mark_magic($child, $arg->{regexp},
565             $arg->{numbered_captures}, $arg->{named_captures},
566 354         832 $arg->{document});
567             }
568             }
569 255         1960 return $TRUE;
570             }
571              
572             sub _mark_magic {
573 464     464   827 my ($elem, $re, $captures, $named_captures, $doc) = @_;
574              
575             # If we're a double-quotish element, we need to grub through its
576             # content. RT #38942
577 464 100       792 if ( _is_double_quotish_element( $elem ) ) {
578 14         50 _mark_magic_in_content(
579             $elem->content(), $re, $captures, $named_captures, $doc );
580 14         38 return;
581             }
582              
583             # Ditto a here document, though the logic is different. RT #38942
584 450 100       1436 if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
585 3 50       10 $elem->content() =~ m/ \A << ~? \s* ' /sxm
586             or _mark_magic_in_content(
587             join( $EMPTY, $elem->heredoc() ), $re, $captures,
588             $named_captures, $doc );
589 3         8 return;
590             }
591              
592             # Only interested in magic, or known English equivalent.
593 447         923 my $content = $elem->content();
594 447 100       1877 my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
595             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
596             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
597             $elem->isa( 'PPI::Token::Magic' )
598 447 100 100     2524 or $capture_ref->{$content}
599             or return;
600              
601 81 100       593 if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
    100          
    100          
602              
603             # Record if we see $1, $2, $3, ...
604 43         149 my $num = $1;
605 43 100       139 if (0 < $num) { # don't mark $0
606             # Only mark the captures we really need -- don't mark superfluous magic vars
607 42 100       68 if ($num <= @{$captures}) {
  42         112  
608 41         116 _record_numbered_capture( $num, $captures );
609             }
610             }
611             } elsif ( $capture_array->{$content} ) { # GitHub #778
612 1         10 foreach my $num ( 1 .. @{$captures} ) {
  1         5  
613 1         6 _record_numbered_capture( $num, $captures );
614             }
615             } elsif ( $capture_ref->{$content} ) {
616 30         443 _mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
617             }
618 81         277 return;
619             }
620              
621             # Record a named capture referenced by a hash or array found in code.
622             # The arguments are:
623             # $elem - The element that represents a subscripted capture variable;
624             # $re - The PPIx::Regexp object;
625             # $captures - A reference to the numbered capture array;
626             # $named_captures - A reference to the named capture hash.
627             sub _mark_magic_subscripted_code {
628 30     30   79 my ( $elem, $re, $captures, $named_captures ) = @_;
629 30 50       108 my $subscr = $elem->snext_sibling() or return;
630 30 50       894 $subscr->isa( 'PPI::Structure::Subscript' ) or return;
631 30         122 my $subval = $subscr->content();
632 30         1144 _record_subscripted_capture(
633             $elem->content(), $subval, $re, $captures, $named_captures );
634 30         75 return;
635             }
636              
637             # Find capture variables in the content of a double-quotish thing, and
638             # record their use. RT #38942. The arguments are:
639             # $content - The content() ( or heredoc() in the case of a here
640             # document) to be analyzed;
641             # $re - The PPIx::Regexp object;
642             # $captures - A reference to the numbered capture array;
643             # $named_captures - A reference to the named capture hash.
644             sub _mark_magic_in_content {
645 17     17   135 my ( $content, $re, $captures, $named_captures, $doc ) = @_;
646              
647 17 100       61 my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
648             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
649             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
650              
651 17         118 while ( $content =~ m< ( [\$\@] (?:
652             [{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) {
653 10         30 my $name = $1;
654 10 100       42 $name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
655             and $name =~ s/ [}] \z //sxm;
656              
657 10 100 33     55 if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
    100          
    50          
658              
659 5         11 my $num = $1;
660             0 < $num
661 5 50 33     18 and $num <= @{ $captures }
  5         27  
662             and _record_numbered_capture( $num, $captures );
663              
664             } elsif ( $capture_array->{$name} ) { # GitHub #778
665 1         8 foreach my $num ( 1 .. @{$captures} ) {
  1         6  
666 1         4 _record_numbered_capture( $num, $captures );
667             }
668              
669             } elsif ( $capture_ref->{$name} &&
670             $content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc )
671             {
672 4         90 _record_subscripted_capture(
673             $name, $1, $re, $captures, $named_captures );
674              
675             }
676             }
677 17         47 return;
678             }
679              
680             # Return true if the given element is double-quotish. Always returns
681             # false for a PPI::Token::HereDoc, since they're a different beast.
682             # RT #38942.
683             sub _is_double_quotish_element {
684 464     464   703 my ( $elem ) = @_;
685              
686 464 50       1117 $elem or return;
687              
688 464         1034 my $content = $elem->content();
689              
690 464 50       2307 if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
691 0         0 return $content !~ m/ \A qx \s* ' /sxm;
692             }
693              
694 464         822 foreach my $class ( qw{
695             PPI::Token::Quote::Double
696             PPI::Token::Quote::Interpolate
697             PPI::Token::QuoteLike::Backtick
698             PPI::Token::QuoteLike::Readline
699             } ) {
700 1814 100       4685 $elem->isa( $class ) and return $TRUE;
701             }
702              
703 450         930 return $FALSE;
704             }
705              
706             # Record a subscripted capture, either hash dereference or array
707             # dereference. We assume that an array represents a numbered capture and
708             # a hash represents a named capture, since we have to handle (e.g.) both
709             # @+ and %+.
710             sub _record_subscripted_capture {
711 34     34   290 my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
712 34 100       261 if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
    50          
713 8         44 ( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
714 8         31 _record_named_capture( $name, $captures, $named_captures );
715             } elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
716             # GitHub #778
717             # Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
718             # But @{^CAPTURE} is zero-based, so we need to tweak the subscript
719             # before we record the capture number.
720 26         93 my $num = $1 + 0;
721             $num >= 0
722 26 100 100     130 and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
723             and $num++;
724 26         193 _record_numbered_capture( $num, $captures, $re );
725             }
726 34         98 return;
727             }
728              
729             # Because a named capture is also one or more numbered captures, the recording
730             # of the use of a named capture seemed complex enough to wrap in a subroutine.
731             sub _record_named_capture {
732 8     8   26 my ( $name, $captures, $named_captures ) = @_;
733 8 50       40 defined ( my $numbers = $named_captures->{$name} ) or return;
734 8         21 foreach my $capnum ( @{ $numbers } ) {
  8         26  
735 10         31 _record_numbered_capture( $capnum, $captures );
736             }
737 8         28 $named_captures->{$name} = undef;
738 8         19 return;
739             }
740              
741             sub _record_numbered_capture {
742 114     114   313 my ( $number, $captures, $re ) = @_;
743 114 100 100     370 $re and $number < 0
744             and $number = $re->max_capture_number() + $number + 1;
745 114 50       464 return if $number <= 0;
746 114         210 $captures->[ $number - 1 ] = 1;
747 114         315 return;
748             }
749              
750             1;
751              
752             __END__
753              
754             #-----------------------------------------------------------------------------
755              
756             =pod
757              
758             =for stopwords refactored
759              
760             =head1 NAME
761              
762             Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
763              
764              
765             =head1 AFFILIATION
766              
767             This Policy is part of the core L<Perl::Critic|Perl::Critic>
768             distribution.
769              
770              
771             =head1 DESCRIPTION
772              
773             Perl regular expressions have multiple types of grouping syntax. The
774             basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
775             C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/> are useful because
776             they have better runtime performance and do not copy strings to the
777             magic global capture variables.
778              
779             It's also easier on the maintenance programmer if you consistently use
780             capturing vs. non-capturing groups, because that programmer can tell
781             more easily which regexps can be refactored without breaking
782             surrounding code which may use the captured values.
783              
784              
785             =head1 CONFIGURATION
786              
787             This Policy is not configurable except for the standard options.
788              
789              
790             =head1 CAVEATS
791              
792             =head2 C<qr//> interpolation
793              
794             This policy can be confused by interpolation of C<qr//> elements, but
795             those are always false negatives. For example:
796              
797             my $foo_re = qr/(foo)/;
798             my ($foo) = m/$foo_re (bar)/x;
799              
800             A human can tell that this should be a violation because there are two
801             captures but only the first capture is used, not the second. The
802             policy only notices that there is one capture in the regexp and
803             remains happy.
804              
805             =head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
806              
807             This policy will only recognize capture groups referred to by these
808             variables if the use is subscripted by a literal integer.
809              
810             =head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
811              
812             This policy will not recognize capture groups referred to only by these
813             variables, because there is in general no way by static analysis to
814             determine which capture group is referred to. For example,
815              
816             m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx
817              
818             makes use of the first capture group if it matches, or the second
819             capture group if the first does not match but the second does.
820              
821             =head2 split()
822              
823             Normally, this policy thinks that if a capture is used at all it must be
824             used before the next regular expression in the same scope. The regular
825             expression in a C<split()> needs to be exempted because it does not
826             affect the caller's capture variables.
827              
828             At present, this policy recognizes and exempts the regular expressions
829             in
830              
831             split /.../, ...
832              
833             and
834              
835             split( /.../, ... )
836              
837             but more exotic syntax may produce false positives.
838              
839              
840             =head1 CREDITS
841              
842             Initial development of this policy was supported by a grant from the
843             Perl Foundation.
844              
845              
846             =head1 AUTHOR
847              
848             Chris Dolan <cdolan@cpan.org>
849              
850              
851             =head1 COPYRIGHT
852              
853             Copyright (c) 2007-2021 Chris Dolan. Many rights reserved.
854              
855             This program is free software; you can redistribute it and/or modify
856             it under the same terms as Perl itself. The full text of this license
857             can be found in the LICENSE file included with this module
858              
859             =cut
860              
861             # Local Variables:
862             # mode: cperl
863             # cperl-indent-level: 4
864             # fill-column: 78
865             # indent-tabs-mode: nil
866             # c-indentation-style: bsd
867             # End:
868             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :