File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm
Criterion Covered Total %
statement 284 289 98.2
branch 177 216 81.9
condition 55 71 77.4
subroutine 45 46 97.8
pod 4 5 80.0
total 565 627 90.1


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2              
3 40     40   31357 use 5.010001;
  40         203  
4 40     40   273 use strict;
  40         133  
  40         827  
5 40     40   244 use warnings;
  40         137  
  40         1081  
6              
7 40     40   271 use Carp;
  40         121  
  40         2649  
8 40     40   333 use English qw(-no_match_vars);
  40         105  
  40         283  
9 40     40   15083 use List::SomeUtils qw(none);
  40         144  
  40         2122  
10 40     40   314 use Readonly;
  40         129  
  40         2181  
11 40     40   316 use Scalar::Util qw(refaddr);
  40         142  
  40         2412  
12              
13 40     40   342 use Perl::Critic::Exception::Fatal::Internal qw{ throw_internal };
  40         129  
  40         2631  
14 40         2090 use Perl::Critic::Utils qw{
15             :booleans :characters :severities hashify precedence_of
16             split_nodes_on_comma
17 40     40   349 };
  40         145  
18 40     40   14330 use parent 'Perl::Critic::Policy';
  40         160  
  40         283  
19              
20             our $VERSION = '1.148';
21              
22             #-----------------------------------------------------------------------------
23              
24             Readonly::Scalar my $SPLIT => q{split};
25             Readonly::Scalar my $WHILE => q{while};
26              
27             Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
28             hashify( qw< ${^CAPTURE} > );
29             # TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
30             # as a use of capture variable n.
31             Readonly::Hash my %CAPTURE_REFERENCE => (
32             hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
33             %ZERO_BASED_CAPTURE_REFERENCE );
34             Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
35             hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
36             %CAPTURE_REFERENCE );
37             Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
38             Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
39             hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
40             %CAPTURE_ARRAY );
41              
42             Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
43             Readonly::Scalar my $EXPL => [252];
44              
45             #-----------------------------------------------------------------------------
46              
47 140     140 0 1868 sub supported_parameters { return qw() }
48 99     99 1 427 sub default_severity { return $SEVERITY_MEDIUM }
49 86     86 1 338 sub default_themes { return qw( core pbp maintenance ) }
50             sub applies_to {
51 81     81 1 265 return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >
52             }
53              
54             #-----------------------------------------------------------------------------
55              
56             Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
57              
58             sub violates {
59 145     145 1 352 my ( $self, $elem, $doc ) = @_;
60              
61             # optimization: don't bother parsing the regexp if there are no parens
62 145 100       493 return if 0 > index $elem->content(), '(';
63              
64 140 50       1026 my $re = $doc->ppix_regexp_from_element( $elem ) or return;
65 140 100       907996 $re->failures() and return;
66              
67 138 100       1091 my $ncaptures = $re->max_capture_number() or return;
68              
69 137         4209 my @captures = ( undef ) x $ncaptures; # List of expected captures
70              
71 137         300 my %named_captures; # List of expected named captures.
72             # Unlike the numbered capture logic, %named_captures
73             # entries are made undefined when a use of the name is
74             # found. Otherwise two hashes would be needed, one to
75             # become defined when a use is found, and one to hold
76             # the mapping of name to number.
77 137 100       235 foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
  137         380  
78             ) || [] } ) {
79             # There can be more than one capture with the same name, so we need to
80             # record all of them. There will be duplications if the 'branch reset'
81             # "(?| ... )" pattern is used, but this is benign given how numbered
82             # captures are recorded.
83 16   100     4678 push @{ $named_captures{ $struct->name() } ||= [] }, $struct->number();
  16         48  
84             }
85              
86             # Look for references to the capture in the regex itself
87 137 100       40998 return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
88              
89 120 100 100     569 if ( $re->modifier_asserted( 'g' )
90             and not _check_if_in_while_condition_or_block( $elem ) ) {
91 14         39 $ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
92 14         73 $#captures = $ncaptures - 1;
93             }
94              
95 120 100 100     2558 return if _enough_assignments($elem, \@captures) && !%named_captures;
96 87 100 100     325 return if _is_in_slurpy_array_context($elem) && !%named_captures;
97 71 100       309 return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
98              
99 25         165 return $self->violation( $DESC, $EXPL, $elem );
100             }
101              
102             # Find uses of both numbered and named capture variables in the regexp itself.
103             # Return true if all are used.
104             sub _enough_uses_in_regexp {
105 137     137   352 my ( $re, $captures, $named_captures, $doc ) = @_;
106              
107             # Look for references to the capture in the regex itself. Note that this
108             # will also find backreferences in the replacement string of s///.
109 137 100       239 foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
  137         364  
110             || [] } ) {
111 2 50       642 if ( $token->is_named() ) {
112 0         0 _record_named_capture( $token->name(), $captures, $named_captures );
113             } else {
114 2         17 _record_numbered_capture( $token->absolute(), $captures );
115             }
116             }
117              
118 137 100       42577 foreach my $token ( @{ $re->find(
  137         335  
119             'PPIx::Regexp::Token::Code' ) || [] } ) {
120 22 50       7911 my $ppi = $token->ppi() or next;
121 22         342 _check_node_children( $ppi, {
122             regexp => $re,
123             numbered_captures => $captures,
124             named_captures => $named_captures,
125             document => $doc,
126             }, _make_regexp_checker() );
127             }
128              
129 138     138   716 return ( none {not defined} @{$captures} )
130             && ( !%{$named_captures} ||
131 137   66 3   35726 none {defined} values %{$named_captures} );
  3         26  
132             }
133              
134             sub _enough_assignments {
135 120     120   286 my ($elem, $captures) = @_;
136              
137             # look backward for the assignment operator
138 120         412 my $psib = $elem->sprevious_sibling;
139             SIBLING:
140 120         3524 while (1) {
141 195 100       2385 return if !$psib;
142 116 100       460 if ($psib->isa('PPI::Token::Operator')) {
143 70 100       202 last SIBLING if q{=} eq $psib->content;
144 32 100       220 return if q{!~} eq $psib->content;
145             }
146 75         288 $psib = $psib->sprevious_sibling;
147             }
148              
149 38         326 $psib = $psib->sprevious_sibling;
150 38 50       1020 return if !$psib; # syntax error: '=' at the beginning of a statement???
151              
152 38 100       153 if ($psib->isa('PPI::Token::Symbol')) {
    100          
    50          
153             # @foo = m/(foo)/
154             # @$foo = m/(foo)/
155             # %foo = m/(foo)/
156             # %$foo = m/(foo)/
157 13 50       52 return $TRUE if _symbol_is_slurpy($psib);
158              
159             } elsif ($psib->isa('PPI::Structure::Block')) {
160             # @{$foo} = m/(foo)/
161             # %{$foo} = m/(foo)/
162 2 50       8 return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
163              
164             } elsif ($psib->isa('PPI::Structure::List')) {
165             # () = m/(foo)/
166             # ($foo) = m/(foo)/
167             # ($foo,$bar) = m/(foo)(bar)/
168             # (@foo) = m/(foo)(bar)/
169             # ($foo,@foo) = m/(foo)(bar)/
170             # ($foo,@$foo) = m/(foo)(bar)/
171             # ($foo,@{$foo}) = m/(foo)(bar)/
172              
173 23         513 my @args = $psib->schildren;
174 23 100       352 return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
175              
176             # Forward looking: PPI might change in v1.200 so schild(0) is a
177             # PPI::Statement::Expression.
178 21 50 33     141 if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
179 21         71 @args = $args[0]->schildren;
180             }
181              
182 21         296 my @parts = split_nodes_on_comma(@args);
183             PART:
184 21         90 for my $i (0 .. $#parts) {
185 33 100       64 if (1 == @{$parts[$i]}) {
  33         103  
186 29         58 my $var = $parts[$i]->[0];
187 29 100 66     126 if ($var->isa('PPI::Token::Symbol') || $var->isa('PPI::Token::Cast')) {
188 24 100       63 return $TRUE if _has_array_sigil($var);
189             }
190             }
191 28         197 _record_numbered_capture( $i + 1, $captures );
192             # ith variable capture
193             }
194             }
195              
196 16     27   84 return none {not defined} @{$captures};
  27         152  
  16         68  
197             }
198              
199             sub _symbol_is_slurpy {
200 13     13   44 my ($symbol) = @_;
201              
202 13 100       34 return $TRUE if _has_array_sigil($symbol);
203 3 100       31 return $TRUE if _has_hash_sigil($symbol);
204 2 50       17 return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
205 0         0 return;
206             }
207              
208             sub _has_array_sigil {
209 37     37   77 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
210              
211 37         105 return q{@} eq substr $elem->content, 0, 1;
212             }
213              
214             sub _has_hash_sigil {
215 3     3   10 my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
216              
217 3         14 return q{%} eq substr $elem->content, 0, 1;
218             }
219              
220             sub _is_preceded_by_array_or_hash_cast {
221 4     4   9 my ($elem) = @_;
222 4         13 my $psib = $elem->sprevious_sibling;
223 4         75 my $cast;
224 4   66     33 while ($psib && $psib->isa('PPI::Token::Cast')) {
225 4         10 $cast = $psib;
226 4         20 $psib = $psib->sprevious_sibling;
227             }
228 4 50       81 return if !$cast;
229 4         14 my $sigil = substr $cast->content, 0, 1;
230 4   66     63 return q{@} eq $sigil || q{%} eq $sigil;
231             }
232              
233             sub _is_in_slurpy_array_context {
234 87     87   209 my ($elem) = @_;
235              
236             # return true is the result of the regexp is passed to a subroutine.
237             # doesn't check for array context due to assignment.
238              
239             # look backward for explicit regex operator
240 87         239 my $psib = $elem->sprevious_sibling;
241 87 100 100     1904 if ($psib && $psib->content eq q{=~}) {
242             # Track back through value
243 2         21 $psib = _skip_lhs($psib);
244             }
245              
246 87 100       527 if (!$psib) {
247 60         205 my $parent = $elem->parent;
248 60 50       377 return if !$parent;
249 60 50       244 if ($parent->isa('PPI::Statement')) {
250 60         160 $parent = $parent->parent;
251 60 50       345 return if !$parent;
252             }
253              
254             # Return true if we have a list that isn't part of a foreach loop.
255             # TECHNICAL DEBT: This code is basically shared with
256             # RequireCheckingReturnValueOfEval. I don't want to put this code
257             # into Perl::Critic::Utils::*, but I don't have time to sort out
258             # PPIx::Utilities::Structure::List yet.
259 60 100       327 if ( $parent->isa('PPI::Structure::List') ) {
260 5 50       67 my $parent_statement = $parent->statement() or return $TRUE;
261 5 100       170 return $TRUE if not
262             $parent_statement->isa('PPI::Statement::Compound');
263 2 50       9 return $TRUE if $parent_statement->type() ne 'foreach';
264             }
265              
266 57 100       438 return $TRUE if $parent->isa('PPI::Structure::Constructor');
267 55 100       238 if ($parent->isa('PPI::Structure::Block')) {
268 5 100       25 return $TRUE
269             if
270             refaddr($elem->statement)
271             eq refaddr([$parent->schildren]->[-1]);
272             }
273 53         341 return;
274             }
275 27 100       115 if ($psib->isa('PPI::Token::Operator')) {
276             # Most operators kill slurpiness (except assignment, which is handled elsewhere).
277 19         57 return q{,} eq $psib->content;
278             }
279 8         86 return $TRUE;
280             }
281              
282             sub _skip_lhs {
283 2     2   8 my ($elem) = @_;
284              
285             # TODO: better implementation to handle casts, expressions, subcalls, etc.
286 2         10 $elem = $elem->sprevious_sibling();
287              
288 2         58 return $elem;
289             }
290              
291             sub _enough_magic {
292 71     71   181 my ($elem, $re, $captures, $named_captures, $doc) = @_;
293              
294 71         242 _check_for_magic($elem, $re, $captures, $named_captures, $doc);
295              
296 85     85   331 return ( none {not defined} @{$captures} )
297             && ( !%{$named_captures} ||
298 71   66 6   1194 none {defined} values %{$named_captures} );
  6         58  
299             }
300              
301             # void return
302             sub _check_for_magic {
303 71     71   177 my ($elem, $re, $captures, $named_captures, $doc) = @_;
304              
305             # Search for $1..$9 in :
306             # * the rest of this statement
307             # * subsequent sibling statements
308             # * if this is in a conditional boolean, the if/else bodies of the conditional
309             # * if this is in a while/for condition, the loop body
310             # But NO intervening regexps!
311              
312             # Package up the usual arguments for _check_rest_of_statement().
313 71         312 my $arg = {
314             regexp => $re,
315             numbered_captures => $captures,
316             named_captures => $named_captures,
317             document => $doc,
318             };
319              
320             # Capture whether or not the regular expression is negated -- that
321             # is, whether it is preceded by the '!~' binding operator.
322 71 100       237 if ( my $prior_token = $elem->sprevious_sibling() ) {
323 18   100     525 $arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
324             q<!~> eq $prior_token->content();
325             }
326              
327 71 100       1158 return if ! _check_rest_of_statement( $elem, $arg );
328              
329 66         226 my $parent = $elem->parent();
330 66   100     642 while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
331 165 100       1006 return if ! _check_rest_of_statement( $parent, $arg );
332 131         420 $parent = $parent->parent();
333             }
334              
335 32         269 return;
336             }
337              
338             # Check if we are in the condition or block of a 'while'
339             sub _check_if_in_while_condition_or_block {
340 17     17   497 my ( $elem ) = @_;
341 17 50       105 $elem or return;
342              
343 17 50       117 my $parent = $elem->parent() or return;
344 17 50       223 $parent->isa( 'PPI::Statement' ) or return;
345              
346 17 50       64 my $item = $parent = $parent->parent() or return;
347 17 100       161 if ( $item->isa( 'PPI::Structure::Block' ) ) {
348 1 50       5 $item = $item->sprevious_sibling() or return;
349             }
350 17 100       165 $item->isa( 'PPI::Structure::Condition' ) or return;
351              
352 3 50       16 $item = $item->sprevious_sibling() or return;
353 3 50       112 $item->isa( 'PPI::Token::Word' ) or return;
354              
355 3         15 return $WHILE eq $item->content();
356             }
357              
358             {
359             # Shortcut operators '||', '//', and 'or' can cause everything after
360             # them to be skipped. 'and' trumps '||' and '//', and causes things
361             # to be evaluated again. The value is true to skip, false to cancel
362             # skipping.
363             Readonly::Hash my %SHORTCUT_OPERATOR => (
364             q<||> => $FALSE,
365             q<//> => $FALSE,
366             and => $TRUE,
367             or => $FALSE,
368             );
369              
370             # RT #38942
371             # The issue in the ticket is that in something like
372             # if ( /(a)/ || /(b)/ ) {
373             # say $1
374             # }
375             # the capture variable can come from either /(a)/ or /(b)/. If we
376             # don't take into account the short-cutting nature of the '||' we
377             # erroneously conclude that the capture in /(a)/ is not used. So we
378             # need to skip every regular expression after an alternation.
379             #
380             # The trick is that we want to still mark magic variables, because
381             # of code like
382             # my $foo = $1 || $2;
383             # so we can't just ignore everything after an alternation.
384             #
385             # To do all this correctly, we have to track precedence, and start
386             # paying attention again if an 'and' is found after a '||'.
387              
388             # Subroutine _make_regexp_checker() manufactures a snippet of code
389             # which is used to track regular expressions. It takes one optional
390             # argument, which is the snippet used to track the parent object's
391             # regular expressions.
392             #
393             # The snippet is passed each token encountered, and returns true if
394             # the scan for capture variables is to be stopped. This will happen
395             # if the token is a regular expression which is _not_ to the right
396             # of an alternation operator ('||', '//', or 'or'), or it _is_ to
397             # the right of an 'and', without an intervening alternation
398             # operator.
399             #
400             # If _make_regexp_checker() was passed a snippet which
401             # returns false on encountering a regular expression, the returned
402             # snippet always returns false, for the benefit of code like
403             # /(a)/ || ( /(b)/ || /(c)/ ).
404              
405             sub _make_regexp_checker {
406 596     596   1086 my ( $parent ) = @_;
407              
408             $parent
409             and not $parent->()
410 596 50 66 0   1465 and return sub { return $FALSE };
  0         0  
411              
412 596         1049 my $check = $TRUE;
413 596         871 my $precedence = 0;
414              
415             return sub {
416 1157     1157   2066 my ( $elem ) = @_;
417              
418 1157 100       3458 $elem or return $check;
419              
420 819 100       2890 if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
421 48 100       134 return _regexp_is_in_split( $elem ) ? $FALSE : $check;
422             }
423              
424 771 100 66     2589 if ( $elem->isa( 'PPI::Token::Structure' )
425             && q<;> eq $elem->content() ) {
426 87         528 $check = $TRUE;
427 87         140 $precedence = 0;
428 87         307 return $FALSE;
429             }
430              
431 684 100       2674 $elem->isa( 'PPI::Token::Operator' )
432             or return $FALSE;
433              
434 83         237 my $content = $elem->content();
435 83 100       565 defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
436             or return $FALSE;
437              
438 21         305 my $oper_precedence = precedence_of( $content );
439 21 100       186 $oper_precedence >= $precedence
440             or return $FALSE;
441              
442 20         49 $precedence = $oper_precedence;
443 20         38 $check = $oper_check;
444              
445 20         62 return $FALSE;
446 596         2537 };
447             }
448             }
449              
450             # Argument is regexp.
451             # True if it is the regexp in a split()
452             sub _regexp_is_in_split {
453 48     48   138 my ( $elem ) = @_;
454              
455 48         85 my $prev;
456 48 100       129 if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
457             # Maybe we have split( /.../, ... )
458 32 50       758 my $stmt = $elem->statement()
459             or return $FALSE;
460 32 50       669 my $list = $stmt->parent()
461             or return $FALSE;
462 32 50       254 $prev = $elem->sprevious_sibling()
463             or return $FALSE;
464             }
465 16   100     617 return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
466             }
467              
468              
469             # false if we hit another regexp
470             # The arguments are:
471             # $elem - The PPI::Element whose siblings are to be checked;
472             # $arg - A hash reference containing the following keys:
473             # regexp => the relevant PPIx::Regexp object;
474             # numbered_captures => a reference to the array used to track the
475             # use of numbered captures;
476             # named_captures => a reference to the hash used to track the
477             # use of named captures;
478             # negated => true if the regexp was bound to its target with the
479             # '!~' operator;
480             # document => a reference to the Perl::Critic::Document;
481             # Converted to passing the arguments everyone gets in a hash because of
482             # the need to add the 'negated' argument, which would put us at six
483             # arguments.
484             sub _check_rest_of_statement {
485 236     236   514 my ( $elem, $arg ) = @_;
486              
487 236         464 my $checker = _make_regexp_checker();
488 236         763 my $nsib = $elem->snext_sibling;
489              
490             # If we are an if (or elsif) and the result of the regexp is
491             # negated, we skip the first block found. RT #69867
492 236 100 100     5226 if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
493 1   33     24 while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
494 0         0 $nsib = $nsib->snext_sibling();
495             }
496 1 50       7 $nsib and $nsib = $nsib->snext_sibling();
497             }
498              
499 236         686 while ($nsib) {
500 203 100       2744 return if $checker->($nsib);
501 198 100       821 if ($nsib->isa('PPI::Node')) {
502 88 100       286 return if ! _check_node_children($nsib, $arg, $checker );
503             } else {
504             _mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
505 110         359 $arg->{named_captures}, $arg->{document} );
506             }
507 164         1305 $nsib = $nsib->snext_sibling;
508             }
509 197         3313 return $TRUE;
510             }
511              
512             {
513              
514             Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
515              
516             # Return true if the argument is the condition of an if or elsif
517             # statement, otherwise return false.
518             sub _is_condition_of_if_statement {
519 11     11   25 my ( $elem ) = @_;
520 11 100 66     87 $elem
521             and $elem->isa( 'PPI::Structure::Condition' )
522             or return $FALSE;
523 1 50       6 my $psib = $elem->sprevious_sibling()
524             or return $FALSE;
525 1 50       28 $psib->isa( 'PPI::Token::Word' )
526             or return $FALSE;
527 1         3 return $IS_IF_STATEMENT{ $psib->content() };
528              
529             }
530             }
531              
532             # false if we hit another regexp
533             # The arguments are:
534             # $elem - The PPI::Node whose children are to be checked;
535             # $arg - A hash reference containing the following keys:
536             # regexp => the relevant PPIx::Regexp object;
537             # numbered_captures => a reference to the array used to track the
538             # use of numbered captures;
539             # named_captures => a reference to the hash used to track the
540             # use of named captures;
541             # document => a reference to the Perl::Critic::Document;
542             # $parent_checker - The parent's regexp checking code snippet,
543             # manufactured by _make_regexp_checker(). This argument is not in
544             # the $arg hash because that hash is shared among levels of the
545             # parse tree, whereas the regexp checker is not.
546             # TODO the things in the $arg hash are widely shared among the various
547             # pieces/parts of this policy; maybe more subroutines should use this
548             # hash rather than passing all this stuff around as individual
549             # arguments. This particular subroutine got the hash-reference treatment
550             # because Subroutines::ProhibitManyArgs started complaining when the
551             # checker argument was added.
552             sub _check_node_children {
553 338     338   665 my ($elem, $arg, $parent_checker) = @_;
554              
555             # caveat: this will descend into subroutine definitions...
556              
557 338         677 my $checker = _make_regexp_checker($parent_checker);
558 338         1108 for my $child ($elem->schildren) {
559 616 100       5826 return if $checker->($child);
560 582 100       2348 if ($child->isa('PPI::Node')) {
561 228 100       749 return if ! _check_node_children($child, $arg, $checker);
562             } else {
563             _mark_magic($child, $arg->{regexp},
564             $arg->{numbered_captures}, $arg->{named_captures},
565 354         1049 $arg->{document});
566             }
567             }
568 255         2239 return $TRUE;
569             }
570              
571             sub _mark_magic {
572 464     464   1000 my ($elem, $re, $captures, $named_captures, $doc) = @_;
573              
574             # If we're a double-quotish element, we need to grub through its
575             # content. RT #38942
576 464 100       907 if ( _is_double_quotish_element( $elem ) ) {
577 14         64 _mark_magic_in_content(
578             $elem->content(), $re, $captures, $named_captures, $doc );
579 14         38 return;
580             }
581              
582             # Ditto a here document, though the logic is different. RT #38942
583 450 100       1446 if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
584 3 50       12 $elem->content() =~ m/ \A << ~? \s* ' /sxm
585             or _mark_magic_in_content(
586             join( $EMPTY, $elem->heredoc() ), $re, $captures,
587             $named_captures, $doc );
588 3         10 return;
589             }
590              
591             # Only interested in magic, or known English equivalent.
592 447         1027 my $content = $elem->content();
593 447 100       2252 my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
594             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
595             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
596             $elem->isa( 'PPI::Token::Magic' )
597 447 100 100     2847 or $capture_ref->{$content}
598             or return;
599              
600 81 100       601 if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
    100          
    100          
601              
602             # Record if we see $1, $2, $3, ...
603 43         153 my $num = $1;
604 43 100       122 if (0 < $num) { # don't mark $0
605             # Only mark the captures we really need -- don't mark superfluous magic vars
606 42 100       80 if ($num <= @{$captures}) {
  42         120  
607 41         138 _record_numbered_capture( $num, $captures );
608             }
609             }
610             } elsif ( $capture_array->{$content} ) { # GitHub #778
611 1         11 foreach my $num ( 1 .. @{$captures} ) {
  1         6  
612 1         5 _record_numbered_capture( $num, $captures );
613             }
614             } elsif ( $capture_ref->{$content} ) {
615 30         452 _mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
616             }
617 81         317 return;
618             }
619              
620             # Record a named capture referenced by a hash or array found in code.
621             # The arguments are:
622             # $elem - The element that represents a subscripted capture variable;
623             # $re - The PPIx::Regexp object;
624             # $captures - A reference to the numbered capture array;
625             # $named_captures - A reference to the named capture hash.
626             sub _mark_magic_subscripted_code {
627 30     30   105 my ( $elem, $re, $captures, $named_captures ) = @_;
628 30 50       103 my $subscr = $elem->snext_sibling() or return;
629 30 50       983 $subscr->isa( 'PPI::Structure::Subscript' ) or return;
630 30         104 my $subval = $subscr->content();
631 30         1115 _record_subscripted_capture(
632             $elem->content(), $subval, $re, $captures, $named_captures );
633 30         74 return;
634             }
635              
636             # Find capture variables in the content of a double-quotish thing, and
637             # record their use. RT #38942. The arguments are:
638             # $content - The content() ( or heredoc() in the case of a here
639             # document) to be analyzed;
640             # $re - The PPIx::Regexp object;
641             # $captures - A reference to the numbered capture array;
642             # $named_captures - A reference to the named capture hash.
643             sub _mark_magic_in_content {
644 17     17   151 my ( $content, $re, $captures, $named_captures, $doc ) = @_;
645              
646 17 100       71 my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
647             ( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
648             ( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
649              
650 17         134 while ( $content =~ m< ( [\$\@] (?:
651             [{] \^? (?: \w+ | . ) [}] | \w+ | . ) ) >sxmg ) {
652 10         41 my $name = $1;
653 10 100       43 $name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
654             and $name =~ s/ [}] \z //sxm;
655              
656 10 100 33     49 if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
    100          
    50          
657              
658 5         11 my $num = $1;
659             0 < $num
660 5 50 33     21 and $num <= @{ $captures }
  5         24  
661             and _record_numbered_capture( $num, $captures );
662              
663             } elsif ( $capture_array->{$name} ) { # GitHub #778
664 1         10 foreach my $num ( 1 .. @{$captures} ) {
  1         4  
665 1         5 _record_numbered_capture( $num, $captures );
666             }
667              
668             } elsif ( $capture_ref->{$name} &&
669             $content =~ m/ \G ( [{] [^}]+ [}] | [[] [^]] []] ) /smxgc )
670             {
671 4         129 _record_subscripted_capture(
672             $name, $1, $re, $captures, $named_captures );
673              
674             }
675             }
676 17         43 return;
677             }
678              
679             # Return true if the given element is double-quotish. Always returns
680             # false for a PPI::Token::HereDoc, since they're a different beast.
681             # RT #38942.
682             sub _is_double_quotish_element {
683 464     464   847 my ( $elem ) = @_;
684              
685 464 50       1261 $elem or return;
686              
687 464         1224 my $content = $elem->content();
688              
689 464 50       2777 if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
690 0         0 return $content !~ m/ \A qx \s* ' /sxm;
691             }
692              
693 464         952 foreach my $class ( qw{
694             PPI::Token::Quote::Double
695             PPI::Token::Quote::Interpolate
696             PPI::Token::QuoteLike::Backtick
697             PPI::Token::QuoteLike::Readline
698             } ) {
699 1814 100       5541 $elem->isa( $class ) and return $TRUE;
700             }
701              
702 450         1175 return $FALSE;
703             }
704              
705             # Record a subscripted capture, either hash dereference or array
706             # dereference. We assume that an array represents a numbered capture and
707             # a hash represents a named capture, since we have to handle (e.g.) both
708             # @+ and %+.
709             sub _record_subscripted_capture {
710 34     34   176 my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
711 34 100       260 if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
    50          
712 8         42 ( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
713 8         32 _record_named_capture( $name, $captures, $named_captures );
714             } elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
715             # GitHub #778
716             # Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
717             # But @{^CAPTURE} is zero-based, so we need to tweak the subscript
718             # before we record the capture number.
719 26         131 my $num = $1 + 0;
720             $num >= 0
721 26 100 100     147 and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
722             and $num++;
723 26         206 _record_numbered_capture( $num, $captures, $re );
724             }
725 34         87 return;
726             }
727              
728             # Because a named capture is also one or more numbered captures, the recording
729             # of the use of a named capture seemed complex enough to wrap in a subroutine.
730             sub _record_named_capture {
731 8     8   23 my ( $name, $captures, $named_captures ) = @_;
732 8 50       33 defined ( my $numbers = $named_captures->{$name} ) or return;
733 8         19 foreach my $capnum ( @{ $numbers } ) {
  8         22  
734 10         26 _record_numbered_capture( $capnum, $captures );
735             }
736 8         28 $named_captures->{$name} = undef;
737 8         22 return;
738             }
739              
740             sub _record_numbered_capture {
741 114     114   302 my ( $number, $captures, $re ) = @_;
742 114 100 100     371 $re and $number < 0
743             and $number = $re->max_capture_number() + $number + 1;
744 114 50       539 return if $number <= 0;
745 114         305 $captures->[ $number - 1 ] = 1;
746 114         291 return;
747             }
748              
749             1;
750              
751             __END__
752              
753             #-----------------------------------------------------------------------------
754              
755             =pod
756              
757             =for stopwords refactored
758              
759             =head1 NAME
760              
761             Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
762              
763              
764             =head1 AFFILIATION
765              
766             This Policy is part of the core L<Perl::Critic|Perl::Critic>
767             distribution.
768              
769              
770             =head1 DESCRIPTION
771              
772             Perl regular expressions have multiple types of grouping syntax. The
773             basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
774             C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/> are useful because
775             they have better runtime performance and do not copy strings to the
776             magic global capture variables.
777              
778             It's also easier on the maintenance programmer if you consistently use
779             capturing vs. non-capturing groups, because that programmer can tell
780             more easily which regexps can be refactored without breaking
781             surrounding code which may use the captured values.
782              
783              
784             =head1 CONFIGURATION
785              
786             This Policy is not configurable except for the standard options.
787              
788              
789             =head1 CAVEATS
790              
791             =head2 C<qr//> interpolation
792              
793             This policy can be confused by interpolation of C<qr//> elements, but
794             those are always false negatives. For example:
795              
796             my $foo_re = qr/(foo)/;
797             my ($foo) = m/$foo_re (bar)/x;
798              
799             A human can tell that this should be a violation because there are two
800             captures but only the first capture is used, not the second. The
801             policy only notices that there is one capture in the regexp and
802             remains happy.
803              
804             =head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
805              
806             This policy will only recognize capture groups referred to by these
807             variables if the use is subscripted by a literal integer.
808              
809             =head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
810              
811             This policy will not recognize capture groups referred to only by these
812             variables, because there is in general no way by static analysis to
813             determine which capture group is referred to. For example,
814              
815             m/ (?: (A[[:alpha:]]+) | (N\d+) ) (?{$foo=$^N}) /smx
816              
817             makes use of the first capture group if it matches, or the second
818             capture group if the first does not match but the second does.
819              
820             =head2 split()
821              
822             Normally, this policy thinks that if a capture is used at all it must be
823             used before the next regular expression in the same scope. The regular
824             expression in a C<split()> needs to be exempted because it does not
825             affect the caller's capture variables.
826              
827             At present, this policy recognizes and exempts the regular expressions
828             in
829              
830             split /.../, ...
831              
832             and
833              
834             split( /.../, ... )
835              
836             but more exotic syntax may produce false positives.
837              
838              
839             =head1 CREDITS
840              
841             Initial development of this policy was supported by a grant from the
842             Perl Foundation.
843              
844              
845             =head1 AUTHOR
846              
847             Chris Dolan <cdolan@cpan.org>
848              
849              
850             =head1 COPYRIGHT
851              
852             Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
853              
854             This program is free software; you can redistribute it and/or modify
855             it under the same terms as Perl itself. The full text of this license
856             can be found in the LICENSE file included with this module
857              
858             =cut
859              
860             # Local Variables:
861             # mode: cperl
862             # cperl-indent-level: 4
863             # fill-column: 78
864             # indent-tabs-mode: nil
865             # c-indentation-style: bsd
866             # End:
867             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :