File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm

Criterion	Covered	Total	%
statement	284	289	98.2
branch	177	216	81.9
condition	55	71	77.4
subroutine	45	46	97.8
pod	4	5	80.0
total	565	627	90.1

line	stmt	bran	cond	sub	pod	time	code
1							package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2
3	40			40		31357	use 5.010001;
	40					203
4	40			40		273	use strict;
	40					133
	40					827
5	40			40		244	use warnings;
	40					137
	40					1081
6
7	40			40		271	use Carp;
	40					121
	40					2649
8	40			40		333	use English qw(-no_match_vars);
	40					105
	40					283
9	40			40		15083	use List::SomeUtils qw(none);
	40					144
	40					2122
10	40			40		314	use Readonly;
	40					129
	40					2181
11	40			40		316	use Scalar::Util qw(refaddr);
	40					142
	40					2412
12
13	40			40		342	use Perl::Critic::Exception::Fatal::Internal qw{ throw_internal };
	40					129
	40					2631
14	40					2090	use Perl::Critic::Utils qw{
15							:booleans :characters :severities hashify precedence_of
16							split_nodes_on_comma
17	40			40		349	};
	40					145
18	40			40		14330	use parent 'Perl::Critic::Policy';
	40					160
	40					283
19
20							our $VERSION = '1.148';
21
22							#-----------------------------------------------------------------------------
23
24							Readonly::Scalar my $SPLIT => q{split};
25							Readonly::Scalar my $WHILE => q{while};
26
27							Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
28							hashify( qw< ${^CAPTURE} > );
29							# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
30							# as a use of capture variable n.
31							Readonly::Hash my %CAPTURE_REFERENCE => (
32							hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
33							%ZERO_BASED_CAPTURE_REFERENCE );
34							Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
35							hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
36							%CAPTURE_REFERENCE );
37							Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
38							Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
39							hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
40							%CAPTURE_ARRAY );
41
42							Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
43							Readonly::Scalar my $EXPL => [252];
44
45							#-----------------------------------------------------------------------------
46
47	140			140	0	1868	sub supported_parameters { return qw() }
48	99			99	1	427	sub default_severity { return $SEVERITY_MEDIUM }
49	86			86	1	338	sub default_themes { return qw( core pbp maintenance ) }
50							sub applies_to {
51	81			81	1	265	return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >
52							}
53
54							#-----------------------------------------------------------------------------
55
56							Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
57
58							sub violates {
59	145			145	1	352	my ( $self, $elem, $doc ) = @_;
60
61							# optimization: don't bother parsing the regexp if there are no parens
62	145	100				493	return if 0 > index $elem->content(), '(';
63
64	140	50				1026	my $re = $doc->ppix_regexp_from_element( $elem ) or return;
65	140	100				907996	$re->failures() and return;
66
67	138	100				1091	my $ncaptures = $re->max_capture_number() or return;
68
69	137					4209	my @captures = ( undef ) x $ncaptures; # List of expected captures
70
71	137					300	my %named_captures; # List of expected named captures.
72							# Unlike the numbered capture logic, %named_captures
73							# entries are made undefined when a use of the name is
74							# found. Otherwise two hashes would be needed, one to
75							# become defined when a use is found, and one to hold
76							# the mapping of name to number.
77	137	100				235	foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
	137					380
78							) \|\| [] } ) {
79							# There can be more than one capture with the same name, so we need to
80							# record all of them. There will be duplications if the 'branch reset'
81							# "(?\| ... )" pattern is used, but this is benign given how numbered
82							# captures are recorded.
83	16		100			4678	push @{ $named_captures{ $struct->name() } \|\|= [] }, $struct->number();
	16					48
84							}
85
86							# Look for references to the capture in the regex itself
87	137	100				40998	return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
88
89	120	100	100			569	if ( $re->modifier_asserted( 'g' )
90							and not _check_if_in_while_condition_or_block( $elem ) ) {
91	14					39	$ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
92	14					73	$#captures = $ncaptures - 1;
93							}
94
95	120	100	100			2558	return if _enough_assignments($elem, \@captures) && !%named_captures;
96	87	100	100			325	return if _is_in_slurpy_array_context($elem) && !%named_captures;
97	71	100				309	return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
98
99	25					165	return $self->violation( $DESC, $EXPL, $elem );
100							}
101
102							# Find uses of both numbered and named capture variables in the regexp itself.
103							# Return true if all are used.
104							sub _enough_uses_in_regexp {
105	137			137		352	my ( $re, $captures, $named_captures, $doc ) = @_;
106
107							# Look for references to the capture in the regex itself. Note that this
108							# will also find backreferences in the replacement string of s///.
109	137	100				239	foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
	137					364
110							\|\| [] } ) {
111	2	50				642	if ( $token->is_named() ) {
112	0					0	_record_named_capture( $token->name(), $captures, $named_captures );
113							} else {
114	2					17	_record_numbered_capture( $token->absolute(), $captures );
115							}
116							}
117
118	137	100				42577	foreach my $token ( @{ $re->find(
	137					335
119							'PPIx::Regexp::Token::Code' ) \|\| [] } ) {
120	22	50				7911	my $ppi = $token->ppi() or next;
121	22					342	_check_node_children( $ppi, {
122							regexp => $re,
123							numbered_captures => $captures,
124							named_captures => $named_captures,
125							document => $doc,
126							}, _make_regexp_checker() );
127							}
128
129	138			138		716	return ( none {not defined} @{$captures} )
130							&& ( !%{$named_captures} \|\|
131	137		66	3		35726	none {defined} values %{$named_captures} );
	3					26
132							}
133
134							sub _enough_assignments {
135	120			120		286	my ($elem, $captures) = @_;
136
137							# look backward for the assignment operator
138	120					412	my $psib = $elem->sprevious_sibling;
139							SIBLING:
140	120					3524	while (1) {
141	195	100				2385	return if !$psib;
142	116	100				460	if ($psib->isa('PPI::Token::Operator')) {
143	70	100				202	last SIBLING if q{=} eq $psib->content;
144	32	100				220	return if q{!~} eq $psib->content;
145							}
146	75					288	$psib = $psib->sprevious_sibling;
147							}
148
149	38					326	$psib = $psib->sprevious_sibling;
150	38	50				1020	return if !$psib; # syntax error: '=' at the beginning of a statement???
151
152	38	100				153	if ($psib->isa('PPI::Token::Symbol')) {
		100
		50
153							# @foo = m/(foo)/
154							# @$foo = m/(foo)/
155							# %foo = m/(foo)/
156							# %$foo = m/(foo)/
157	13	50				52	return $TRUE if _symbol_is_slurpy($psib);
158
159							} elsif ($psib->isa('PPI::Structure::Block')) {
160							# @{$foo} = m/(foo)/
161							# %{$foo} = m/(foo)/
162	2	50				8	return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
163
164							} elsif ($psib->isa('PPI::Structure::List')) {
165							# () = m/(foo)/
166							# ($foo) = m/(foo)/
167							# ($foo,$bar) = m/(foo)(bar)/
168							# (@foo) = m/(foo)(bar)/
169							# ($foo,@foo) = m/(foo)(bar)/
170							# ($foo,@$foo) = m/(foo)(bar)/
171							# ($foo,@{$foo}) = m/(foo)(bar)/
172
173	23					513	my @args = $psib->schildren;
174	23	100				352	return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
175
176							# Forward looking: PPI might change in v1.200 so schild(0) is a
177							# PPI::Statement::Expression.
178	21	50	33			141	if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
179	21					71	@args = $args[0]->schildren;
180							}
181
182	21					296	my @parts = split_nodes_on_comma(@args);
183							PART:
184	21					90	for my $i (0 .. $#parts) {
185	33	100				64	if (1 == @{$parts[$i]}) {
	33					103
186	29					58	my $var = $parts[$i]->[0];
187	29	100	66			126	if ($var->isa('PPI::Token::Symbol') \|\| $var->isa('PPI::Token::Cast')) {
188	24	100				63	return $TRUE if _has_array_sigil($var);
189							}
190							}
191	28					197	_record_numbered_capture( $i + 1, $captures );
192							# ith variable capture
193							}
194							}
195
196	16			27		84	return none {not defined} @{$captures};
	27					152
	16					68
197							}
198
199							sub _symbol_is_slurpy {
200	13			13		44	my ($symbol) = @_;
201
202	13	100				34	return $TRUE if _has_array_sigil($symbol);
203	3	100				31	return $TRUE if _has_hash_sigil($symbol);
204	2	50				17	return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
205	0					0	return;
206							}
207
208							sub _has_array_sigil {
209	37			37		77	my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
210
211	37					105	return q{@} eq substr $elem->content, 0, 1;
212							}
213
214							sub _has_hash_sigil {
215	3			3		10	my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
216
217	3					14	return q{%} eq substr $elem->content, 0, 1;
218							}
219
220							sub _is_preceded_by_array_or_hash_cast {
221	4			4		9	my ($elem) = @_;
222	4					13	my $psib = $elem->sprevious_sibling;
223	4					75	my $cast;
224	4		66			33	while ($psib && $psib->isa('PPI::Token::Cast')) {
225	4					10	$cast = $psib;
226	4					20	$psib = $psib->sprevious_sibling;
227							}
228	4	50				81	return if !$cast;
229	4					14	my $sigil = substr $cast->content, 0, 1;
230	4		66			63	return q{@} eq $sigil \|\| q{%} eq $sigil;
231							}
232
233							sub _is_in_slurpy_array_context {
234	87			87		209	my ($elem) = @_;
235
236							# return true is the result of the regexp is passed to a subroutine.
237							# doesn't check for array context due to assignment.
238
239							# look backward for explicit regex operator
240	87					239	my $psib = $elem->sprevious_sibling;
241	87	100	100			1904	if ($psib && $psib->content eq q{=~}) {
242							# Track back through value
243	2					21	$psib = _skip_lhs($psib);
244							}
245
246	87	100				527	if (!$psib) {
247	60					205	my $parent = $elem->parent;
248	60	50				377	return if !$parent;
249	60	50				244	if ($parent->isa('PPI::Statement')) {
250	60					160	$parent = $parent->parent;
251	60	50				345	return if !$parent;
252							}
253
254							# Return true if we have a list that isn't part of a foreach loop.
255							# TECHNICAL DEBT: This code is basically shared with
256							# RequireCheckingReturnValueOfEval. I don't want to put this code
257							# into Perl::Critic::Utils::*, but I don't have time to sort out
258							# PPIx::Utilities::Structure::List yet.
259	60	100				327	if ( $parent->isa('PPI::Structure::List') ) {
260	5	50				67	my $parent_statement = $parent->statement() or return $TRUE;
261	5	100				170	return $TRUE if not
262							$parent_statement->isa('PPI::Statement::Compound');
263	2	50				9	return $TRUE if $parent_statement->type() ne 'foreach';
264							}
265
266	57	100				438	return $TRUE if $parent->isa('PPI::Structure::Constructor');
267	55	100				238	if ($parent->isa('PPI::Structure::Block')) {
268	5	100				25	return $TRUE
269							if
270							refaddr($elem->statement)
271							eq refaddr([$parent->schildren]->[-1]);
272							}
273	53					341	return;
274							}
275	27	100				115	if ($psib->isa('PPI::Token::Operator')) {
276							# Most operators kill slurpiness (except assignment, which is handled elsewhere).
277	19					57	return q{,} eq $psib->content;
278							}
279	8					86	return $TRUE;
280							}
281
282							sub _skip_lhs {
283	2			2		8	my ($elem) = @_;
284
285							# TODO: better implementation to handle casts, expressions, subcalls, etc.
286	2					10	$elem = $elem->sprevious_sibling();
287
288	2					58	return $elem;
289							}
290
291							sub _enough_magic {
292	71			71		181	my ($elem, $re, $captures, $named_captures, $doc) = @_;
293
294	71					242	_check_for_magic($elem, $re, $captures, $named_captures, $doc);
295
296	85			85		331	return ( none {not defined} @{$captures} )
297							&& ( !%{$named_captures} \|\|
298	71		66	6		1194	none {defined} values %{$named_captures} );
	6					58
299							}
300
301							# void return
302							sub _check_for_magic {
303	71			71		177	my ($elem, $re, $captures, $named_captures, $doc) = @_;
304
305							# Search for $1..$9 in :
306							# * the rest of this statement
307							# * subsequent sibling statements
308							# * if this is in a conditional boolean, the if/else bodies of the conditional
309							# * if this is in a while/for condition, the loop body
310							# But NO intervening regexps!
311
312							# Package up the usual arguments for _check_rest_of_statement().
313	71					312	my $arg = {
314							regexp => $re,
315							numbered_captures => $captures,
316							named_captures => $named_captures,
317							document => $doc,
318							};
319
320							# Capture whether or not the regular expression is negated -- that
321							# is, whether it is preceded by the '!~' binding operator.
322	71	100				237	if ( my $prior_token = $elem->sprevious_sibling() ) {
323	18		100			525	$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
324							q<!~> eq $prior_token->content();
325							}
326
327	71	100				1158	return if ! _check_rest_of_statement( $elem, $arg );
328
329	66					226	my $parent = $elem->parent();
330	66		100			642	while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
331	165	100				1006	return if ! _check_rest_of_statement( $parent, $arg );
332	131					420	$parent = $parent->parent();
333							}
334
335	32					269	return;
336							}
337
338							# Check if we are in the condition or block of a 'while'
339							sub _check_if_in_while_condition_or_block {
340	17			17		497	my ( $elem ) = @_;
341	17	50				105	$elem or return;
342
343	17	50				117	my $parent = $elem->parent() or return;
344	17	50				223	$parent->isa( 'PPI::Statement' ) or return;
345
346	17	50				64	my $item = $parent = $parent->parent() or return;
347	17	100				161	if ( $item->isa( 'PPI::Structure::Block' ) ) {
348	1	50				5	$item = $item->sprevious_sibling() or return;
349							}
350	17	100				165	$item->isa( 'PPI::Structure::Condition' ) or return;
351
352	3	50				16	$item = $item->sprevious_sibling() or return;
353	3	50				112	$item->isa( 'PPI::Token::Word' ) or return;
354
355	3					15	return $WHILE eq $item->content();
356							}
357
358							{
359							# Shortcut operators '\|\|', '//', and 'or' can cause everything after
360							# them to be skipped. 'and' trumps '\|\|' and '//', and causes things
361							# to be evaluated again. The value is true to skip, false to cancel
362							# skipping.
363							Readonly::Hash my %SHORTCUT_OPERATOR => (
364							q<\|\|> => $FALSE,
365							q<//> => $FALSE,
366							and => $TRUE,
367							or => $FALSE,
368							);
369
370							# RT #38942
371							# The issue in the ticket is that in something like
372							# if ( /(a)/ \|\| /(b)/ ) {
373							# say $1
374							# }
375							# the capture variable can come from either /(a)/ or /(b)/. If we
376							# don't take into account the short-cutting nature of the '\|\|' we
377							# erroneously conclude that the capture in /(a)/ is not used. So we
378							# need to skip every regular expression after an alternation.
379							#
380							# The trick is that we want to still mark magic variables, because
381							# of code like
382							# my $foo = $1 \|\| $2;
383							# so we can't just ignore everything after an alternation.
384							#
385							# To do all this correctly, we have to track precedence, and start
386							# paying attention again if an 'and' is found after a '\|\|'.
387
388							# Subroutine _make_regexp_checker() manufactures a snippet of code
389							# which is used to track regular expressions. It takes one optional
390							# argument, which is the snippet used to track the parent object's
391							# regular expressions.
392							#
393							# The snippet is passed each token encountered, and returns true if
394							# the scan for capture variables is to be stopped. This will happen
395							# if the token is a regular expression which is _not_ to the right
396							# of an alternation operator ('\|\|', '//', or 'or'), or it _is_ to
397							# the right of an 'and', without an intervening alternation
398							# operator.
399							#
400							# If _make_regexp_checker() was passed a snippet which
401							# returns false on encountering a regular expression, the returned
402							# snippet always returns false, for the benefit of code like
403							# /(a)/ \|\| ( /(b)/ \|\| /(c)/ ).
404
405							sub _make_regexp_checker {
406	596			596		1086	my ( $parent ) = @_;
407
408							$parent
409							and not $parent->()
410	596	50	66	0		1465	and return sub { return $FALSE };
	0					0
411
412	596					1049	my $check = $TRUE;
413	596					871	my $precedence = 0;
414
415							return sub {
416	1157			1157		2066	my ( $elem ) = @_;
417
418	1157	100				3458	$elem or return $check;
419
420	819	100				2890	if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
421	48	100				134	return _regexp_is_in_split( $elem ) ? $FALSE : $check;
422							}
423
424	771	100	66			2589	if ( $elem->isa( 'PPI::Token::Structure' )
425							&& q<;> eq $elem->content() ) {
426	87					528	$check = $TRUE;
427	87					140	$precedence = 0;
428	87					307	return $FALSE;
429							}
430
431	684	100				2674	$elem->isa( 'PPI::Token::Operator' )
432							or return $FALSE;
433
434	83					237	my $content = $elem->content();
435	83	100				565	defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
436							or return $FALSE;
437
438	21					305	my $oper_precedence = precedence_of( $content );
439	21	100				186	$oper_precedence >= $precedence
440							or return $FALSE;
441
442	20					49	$precedence = $oper_precedence;
443	20					38	$check = $oper_check;
444
445	20					62	return $FALSE;
446	596					2537	};
447							}
448							}
449
450							# Argument is regexp.
451							# True if it is the regexp in a split()
452							sub _regexp_is_in_split {
453	48			48		138	my ( $elem ) = @_;
454
455	48					85	my $prev;
456	48	100				129	if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
457							# Maybe we have split( /.../, ... )
458	32	50				758	my $stmt = $elem->statement()
459							or return $FALSE;
460	32	50				669	my $list = $stmt->parent()
461							or return $FALSE;
462	32	50				254	$prev = $elem->sprevious_sibling()
463							or return $FALSE;
464							}
465	16		100			617	return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
466							}
467
468
469							# false if we hit another regexp
470							# The arguments are:
471							# $elem - The PPI::Element whose siblings are to be checked;
472							# $arg - A hash reference containing the following keys:
473							# regexp => the relevant PPIx::Regexp object;
474							# numbered_captures => a reference to the array used to track the
475							# use of numbered captures;
476							# named_captures => a reference to the hash used to track the
477							# use of named captures;
478							# negated => true if the regexp was bound to its target with the
479							# '!~' operator;
480							# document => a reference to the Perl::Critic::Document;
481							# Converted to passing the arguments everyone gets in a hash because of
482							# the need to add the 'negated' argument, which would put us at six
483							# arguments.
484							sub _check_rest_of_statement {
485	236			236		514	my ( $elem, $arg ) = @_;
486
487	236					464	my $checker = _make_regexp_checker();
488	236					763	my $nsib = $elem->snext_sibling;
489
490							# If we are an if (or elsif) and the result of the regexp is
491							# negated, we skip the first block found. RT #69867
492	236	100	100			5226	if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
493	1		33			24	while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
494	0					0	$nsib = $nsib->snext_sibling();
495							}
496	1	50				7	$nsib and $nsib = $nsib->snext_sibling();
497							}
498
499	236					686	while ($nsib) {
500	203	100				2744	return if $checker->($nsib);
501	198	100				821	if ($nsib->isa('PPI::Node')) {
502	88	100				286	return if ! _check_node_children($nsib, $arg, $checker );
503							} else {
504							_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
505	110					359	$arg->{named_captures}, $arg->{document} );
506							}
507	164					1305	$nsib = $nsib->snext_sibling;
508							}
509	197					3313	return $TRUE;
510							}
511
512							{
513
514							Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
515
516							# Return true if the argument is the condition of an if or elsif
517							# statement, otherwise return false.
518							sub _is_condition_of_if_statement {
519	11			11		25	my ( $elem ) = @_;
520	11	100	66			87	$elem
521							and $elem->isa( 'PPI::Structure::Condition' )
522							or return $FALSE;
523	1	50				6	my $psib = $elem->sprevious_sibling()
524							or return $FALSE;
525	1	50				28	$psib->isa( 'PPI::Token::Word' )
526							or return $FALSE;
527	1					3	return $IS_IF_STATEMENT{ $psib->content() };
528
529							}
530							}
531
532							# false if we hit another regexp
533							# The arguments are:
534							# $elem - The PPI::Node whose children are to be checked;
535							# $arg - A hash reference containing the following keys:
536							# regexp => the relevant PPIx::Regexp object;
537							# numbered_captures => a reference to the array used to track the
538							# use of numbered captures;
539							# named_captures => a reference to the hash used to track the
540							# use of named captures;
541							# document => a reference to the Perl::Critic::Document;
542							# $parent_checker - The parent's regexp checking code snippet,
543							# manufactured by _make_regexp_checker(). This argument is not in
544							# the $arg hash because that hash is shared among levels of the
545							# parse tree, whereas the regexp checker is not.
546							# TODO the things in the $arg hash are widely shared among the various
547							# pieces/parts of this policy; maybe more subroutines should use this
548							# hash rather than passing all this stuff around as individual
549							# arguments. This particular subroutine got the hash-reference treatment
550							# because Subroutines::ProhibitManyArgs started complaining when the
551							# checker argument was added.
552							sub _check_node_children {
553	338			338		665	my ($elem, $arg, $parent_checker) = @_;
554
555							# caveat: this will descend into subroutine definitions...
556
557	338					677	my $checker = _make_regexp_checker($parent_checker);
558	338					1108	for my $child ($elem->schildren) {
559	616	100				5826	return if $checker->($child);
560	582	100				2348	if ($child->isa('PPI::Node')) {
561	228	100				749	return if ! _check_node_children($child, $arg, $checker);
562							} else {
563							_mark_magic($child, $arg->{regexp},
564							$arg->{numbered_captures}, $arg->{named_captures},
565	354					1049	$arg->{document});
566							}
567							}
568	255					2239	return $TRUE;
569							}
570
571							sub _mark_magic {
572	464			464		1000	my ($elem, $re, $captures, $named_captures, $doc) = @_;
573
574							# If we're a double-quotish element, we need to grub through its
575							# content. RT #38942
576	464	100				907	if ( _is_double_quotish_element( $elem ) ) {
577	14					64	_mark_magic_in_content(
578							$elem->content(), $re, $captures, $named_captures, $doc );
579	14					38	return;
580							}
581
582							# Ditto a here document, though the logic is different. RT #38942
583	450	100				1446	if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
584	3	50				12	$elem->content() =~ m/ \A << ~? \s* ' /sxm
585							or _mark_magic_in_content(
586							join( $EMPTY, $elem->heredoc() ), $re, $captures,
587							$named_captures, $doc );
588	3					10	return;
589							}
590
591							# Only interested in magic, or known English equivalent.
592	447					1027	my $content = $elem->content();
593	447	100				2252	my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
594							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
595							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
596							$elem->isa( 'PPI::Token::Magic' )
597	447	100	100			2847	or $capture_ref->{$content}
598							or return;
599
600	81	100				601	if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
		100
		100
601
602							# Record if we see $1, $2, $3, ...
603	43					153	my $num = $1;
604	43	100				122	if (0 < $num) { # don't mark $0
605							# Only mark the captures we really need -- don't mark superfluous magic vars
606	42	100				80	if ($num <= @{$captures}) {
	42					120
607	41					138	_record_numbered_capture( $num, $captures );
608							}
609							}
610							} elsif ( $capture_array->{$content} ) { # GitHub #778
611	1					11	foreach my $num ( 1 .. @{$captures} ) {
	1					6
612	1					5	_record_numbered_capture( $num, $captures );
613							}
614							} elsif ( $capture_ref->{$content} ) {
615	30					452	_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
616							}
617	81					317	return;
618							}
619
620							# Record a named capture referenced by a hash or array found in code.
621							# The arguments are:
622							# $elem - The element that represents a subscripted capture variable;
623							# $re - The PPIx::Regexp object;
624							# $captures - A reference to the numbered capture array;
625							# $named_captures - A reference to the named capture hash.
626							sub _mark_magic_subscripted_code {
627	30			30		105	my ( $elem, $re, $captures, $named_captures ) = @_;
628	30	50				103	my $subscr = $elem->snext_sibling() or return;
629	30	50				983	$subscr->isa( 'PPI::Structure::Subscript' ) or return;
630	30					104	my $subval = $subscr->content();
631	30					1115	_record_subscripted_capture(
632							$elem->content(), $subval, $re, $captures, $named_captures );
633	30					74	return;
634							}
635
636							# Find capture variables in the content of a double-quotish thing, and
637							# record their use. RT #38942. The arguments are:
638							# $content - The content() ( or heredoc() in the case of a here
639							# document) to be analyzed;
640							# $re - The PPIx::Regexp object;
641							# $captures - A reference to the numbered capture array;
642							# $named_captures - A reference to the named capture hash.
643							sub _mark_magic_in_content {
644	17			17		151	my ( $content, $re, $captures, $named_captures, $doc ) = @_;
645
646	17	100				71	my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
647							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
648							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
649
650	17					134	while ( $content =~ m< ( [\$\@] (?:
651							[{] \^? (?: \w+ \| . ) [}] \| \w+ \| . ) ) >sxmg ) {
652	10					41	my $name = $1;
653	10	100				43	$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
654							and $name =~ s/ [}] \z //sxm;
655
656	10	100	33			49	if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
		100
		50
657
658	5					11	my $num = $1;
659							0 < $num
660	5	50	33			21	and $num <= @{ $captures }
	5					24
661							and _record_numbered_capture( $num, $captures );
662
663							} elsif ( $capture_array->{$name} ) { # GitHub #778
664	1					10	foreach my $num ( 1 .. @{$captures} ) {
	1					4
665	1					5	_record_numbered_capture( $num, $captures );
666							}
667
668							} elsif ( $capture_ref->{$name} &&
669							$content =~ m/ \G ( [{] [^}]+ [}] \| [[] [^]] []] ) /smxgc )
670							{
671	4					129	_record_subscripted_capture(
672							$name, $1, $re, $captures, $named_captures );
673
674							}
675							}
676	17					43	return;
677							}
678
679							# Return true if the given element is double-quotish. Always returns
680							# false for a PPI::Token::HereDoc, since they're a different beast.
681							# RT #38942.
682							sub _is_double_quotish_element {
683	464			464		847	my ( $elem ) = @_;
684
685	464	50				1261	$elem or return;
686
687	464					1224	my $content = $elem->content();
688
689	464	50				2777	if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
690	0					0	return $content !~ m/ \A qx \s* ' /sxm;
691							}
692
693	464					952	foreach my $class ( qw{
694							PPI::Token::Quote::Double
695							PPI::Token::Quote::Interpolate
696							PPI::Token::QuoteLike::Backtick
697							PPI::Token::QuoteLike::Readline
698							} ) {
699	1814	100				5541	$elem->isa( $class ) and return $TRUE;
700							}
701
702	450					1175	return $FALSE;
703							}
704
705							# Record a subscripted capture, either hash dereference or array
706							# dereference. We assume that an array represents a numbered capture and
707							# a hash represents a named capture, since we have to handle (e.g.) both
708							# @+ and %+.
709							sub _record_subscripted_capture {
710	34			34		176	my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
711	34	100				260	if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
		50
712	8					42	( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
713	8					32	_record_named_capture( $name, $captures, $named_captures );
714							} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
715							# GitHub #778
716							# Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
717							# But @{^CAPTURE} is zero-based, so we need to tweak the subscript
718							# before we record the capture number.
719	26					131	my $num = $1 + 0;
720							$num >= 0
721	26	100	100			147	and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
722							and $num++;
723	26					206	_record_numbered_capture( $num, $captures, $re );
724							}
725	34					87	return;
726							}
727
728							# Because a named capture is also one or more numbered captures, the recording
729							# of the use of a named capture seemed complex enough to wrap in a subroutine.
730							sub _record_named_capture {
731	8			8		23	my ( $name, $captures, $named_captures ) = @_;
732	8	50				33	defined ( my $numbers = $named_captures->{$name} ) or return;
733	8					19	foreach my $capnum ( @{ $numbers } ) {
	8					22
734	10					26	_record_numbered_capture( $capnum, $captures );
735							}
736	8					28	$named_captures->{$name} = undef;
737	8					22	return;
738							}
739
740							sub _record_numbered_capture {
741	114			114		302	my ( $number, $captures, $re ) = @_;
742	114	100	100			371	$re and $number < 0
743							and $number = $re->max_capture_number() + $number + 1;
744	114	50				539	return if $number <= 0;
745	114					305	$captures->[ $number - 1 ] = 1;
746	114					291	return;
747							}
748
749							1;
750
751							__END__
752
753							#-----------------------------------------------------------------------------
754
755							=pod
756
757							=for stopwords refactored
758
759							=head1 NAME
760
761							Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
762
763
764							=head1 AFFILIATION
765
766							This Policy is part of the core L<Perl::Critic\|Perl::Critic>
767							distribution.
768
769
770							=head1 DESCRIPTION
771
772							Perl regular expressions have multiple types of grouping syntax. The
773							basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
774							C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/> are useful because
775							they have better runtime performance and do not copy strings to the
776							magic global capture variables.
777
778							It's also easier on the maintenance programmer if you consistently use
779							capturing vs. non-capturing groups, because that programmer can tell
780							more easily which regexps can be refactored without breaking
781							surrounding code which may use the captured values.
782
783
784							=head1 CONFIGURATION
785
786							This Policy is not configurable except for the standard options.
787
788
789							=head1 CAVEATS
790
791							=head2 C<qr//> interpolation
792
793							This policy can be confused by interpolation of C<qr//> elements, but
794							those are always false negatives. For example:
795
796							my $foo_re = qr/(foo)/;
797							my ($foo) = m/$foo_re (bar)/x;
798
799							A human can tell that this should be a violation because there are two
800							captures but only the first capture is used, not the second. The
801							policy only notices that there is one capture in the regexp and
802							remains happy.
803
804							=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
805
806							This policy will only recognize capture groups referred to by these
807							variables if the use is subscripted by a literal integer.
808
809							=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
810
811							This policy will not recognize capture groups referred to only by these
812							variables, because there is in general no way by static analysis to
813							determine which capture group is referred to. For example,
814
815							m/ (?: (A[[:alpha:]]+) \| (N\d+) ) (?{$foo=$^N}) /smx
816
817							makes use of the first capture group if it matches, or the second
818							capture group if the first does not match but the second does.
819
820							=head2 split()
821
822							Normally, this policy thinks that if a capture is used at all it must be
823							used before the next regular expression in the same scope. The regular
824							expression in a C<split()> needs to be exempted because it does not
825							affect the caller's capture variables.
826
827							At present, this policy recognizes and exempts the regular expressions
828							in
829
830							split /.../, ...
831
832							and
833
834							split( /.../, ... )
835
836							but more exotic syntax may produce false positives.
837
838
839							=head1 CREDITS
840
841							Initial development of this policy was supported by a grant from the
842							Perl Foundation.
843
844
845							=head1 AUTHOR
846
847							Chris Dolan <cdolan@cpan.org>
848
849
850							=head1 COPYRIGHT
851
852							Copyright (c) 2007-2023 Chris Dolan. Many rights reserved.
853
854							This program is free software; you can redistribute it and/or modify
855							it under the same terms as Perl itself. The full text of this license
856							can be found in the LICENSE file included with this module
857
858							=cut
859
860							# Local Variables:
861							# mode: cperl
862							# cperl-indent-level: 4
863							# fill-column: 78
864							# indent-tabs-mode: nil
865							# c-indentation-style: bsd
866							# End:
867							# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :