File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitUnusedCapture.pm

Criterion	Covered	Total	%
statement	285	290	98.2
branch	179	218	82.1
condition	55	71	77.4
subroutine	45	46	97.8
pod	4	5	80.0
total	568	630	90.1

line	stmt	bran	cond	sub	pod	time	code
1							package Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture;
2
3	40			40		31093	use 5.010001;
	40					199
4	40			40		265	use strict;
	40					128
	40					830
5	40			40		246	use warnings;
	40					109
	40					1005
6
7	40			40		237	use Carp;
	40					131
	40					2567
8	40			40		348	use English qw(-no_match_vars);
	40					112
	40					260
9	40			40		15032	use List::SomeUtils qw(none);
	40					163
	40					2085
10	40			40		366	use Readonly;
	40					128
	40					2141
11	40			40		357	use Scalar::Util qw(refaddr);
	40					122
	40					2368
12
13	40			40		346	use Perl::Critic::Exception::Fatal::Internal qw{ throw_internal };
	40					148
	40					2654
14	40					2143	use Perl::Critic::Utils qw{
15							:booleans :characters :severities hashify precedence_of
16							split_nodes_on_comma
17	40			40		333	};
	40					148
18	40			40		14659	use parent 'Perl::Critic::Policy';
	40					160
	40					308
19
20							our $VERSION = '1.146';
21
22							#-----------------------------------------------------------------------------
23
24							Readonly::Scalar my $SPLIT => q{split};
25							Readonly::Scalar my $WHILE => q{while};
26
27							Readonly::Hash my %ZERO_BASED_CAPTURE_REFERENCE =>
28							hashify( qw< ${^CAPTURE} > );
29							# TODO: additional logic to prevent ${^CAPTURE_ALL}[n] from being recognized
30							# as a use of capture variable n.
31							Readonly::Hash my %CAPTURE_REFERENCE => (
32							hashify( qw< $+ $- ${^CAPTURE_ALL} > ),
33							%ZERO_BASED_CAPTURE_REFERENCE );
34							Readonly::Hash my %CAPTURE_REFERENCE_ENGLISH => (
35							hashify( qw{ $LAST_PAREN_MATCH $LAST_MATCH_START $LAST_MATCH_END } ),
36							%CAPTURE_REFERENCE );
37							Readonly::Hash my %CAPTURE_ARRAY => hashify( qw< @- @+ @{^CAPTURE} > );
38							Readonly::Hash my %CAPTURE_ARRAY_ENGLISH => (
39							hashify( qw< @LAST_MATCH_START @LAST_MATCH_END > ),
40							%CAPTURE_ARRAY );
41
42							Readonly::Scalar my $DESC => q{Only use a capturing group if you plan to use the captured value};
43							Readonly::Scalar my $EXPL => [252];
44
45							#-----------------------------------------------------------------------------
46
47	140			140	0	1809	sub supported_parameters { return qw() }
48	99			99	1	447	sub default_severity { return $SEVERITY_MEDIUM }
49	86			86	1	412	sub default_themes { return qw( core pbp maintenance ) }
50							sub applies_to {
51	81			81	1	292	return qw< PPI::Token::Regexp::Match PPI::Token::Regexp::Substitute >
52							}
53
54							#-----------------------------------------------------------------------------
55
56							Readonly::Scalar my $NUM_CAPTURES_FOR_GLOBAL => 100; # arbitrarily large number
57
58							sub violates {
59	145			145	1	367	my ( $self, $elem, $doc ) = @_;
60
61							# optimization: don't bother parsing the regexp if there are no parens
62	145	100				468	return if 0 > index $elem->content(), '(';
63
64	140	50				1155	my $re = $doc->ppix_regexp_from_element( $elem ) or return;
65	140	100				791700	$re->failures() and return;
66
67	138	100				1150	my $ncaptures = $re->max_capture_number() or return;
68
69	137					4220	my @captures = ( undef ) x $ncaptures; # List of expected captures
70
71	137					251	my %named_captures; # List of expected named captures.
72							# Unlike the numbered capture logic, %named_captures
73							# entries are made undefined when a use of the name is
74							# found. Otherwise two hashes would be needed, one to
75							# become defined when a use is found, and one to hold
76							# the mapping of name to number.
77	137	100				226	foreach my $struct ( @{ $re->find( 'PPIx::Regexp::Structure::NamedCapture'
	137					359
78							) \|\| [] } ) {
79							# There can be more than one capture with the same name, so we need to
80							# record all of them. There will be duplications if the 'branch reset'
81							# "(?\| ... )" pattern is used, but this is benign given how numbered
82							# captures are recorded.
83	16		100			4065	push @{ $named_captures{ $struct->name() } \|\|= [] }, $struct->number();
	16					60
84							}
85
86							# Look for references to the capture in the regex itself
87	137	100				34213	return if _enough_uses_in_regexp( $re, \@captures, \%named_captures, $doc );
88
89	120	100	100			532	if ( $re->modifier_asserted( 'g' )
90							and not _check_if_in_while_condition_or_block( $elem ) ) {
91	14					40	$ncaptures = $NUM_CAPTURES_FOR_GLOBAL;
92	14					64	$#captures = $ncaptures - 1;
93							}
94
95	120	100	100			2361	return if _enough_assignments($elem, \@captures) && !%named_captures;
96	87	100	100			350	return if _is_in_slurpy_array_context($elem) && !%named_captures;
97	71	100				241	return if _enough_magic($elem, $re, \@captures, \%named_captures, $doc);
98
99	25					153	return $self->violation( $DESC, $EXPL, $elem );
100							}
101
102							# Find uses of both numbered and named capture variables in the regexp itself.
103							# Return true if all are used.
104							sub _enough_uses_in_regexp {
105	137			137		368	my ( $re, $captures, $named_captures, $doc ) = @_;
106
107							# Look for references to the capture in the regex itself. Note that this
108							# will also find backreferences in the replacement string of s///.
109	137	100				226	foreach my $token ( @{ $re->find( 'PPIx::Regexp::Token::Reference' )
	137					406
110							\|\| [] } ) {
111	2	50				511	if ( $token->is_named() ) {
112	0					0	_record_named_capture( $token->name(), $captures, $named_captures );
113							} else {
114	2					13	_record_numbered_capture( $token->absolute(), $captures );
115							}
116							}
117
118	137	100				36625	foreach my $token ( @{ $re->find(
	137					361
119							'PPIx::Regexp::Token::Code' ) \|\| [] } ) {
120	22	50				6573	my $ppi = $token->ppi() or next;
121	22					350	_check_node_children( $ppi, {
122							regexp => $re,
123							numbered_captures => $captures,
124							named_captures => $named_captures,
125							document => $doc,
126							}, _make_regexp_checker() );
127							}
128
129	138			138		776	return ( none {not defined} @{$captures} )
130							&& ( !%{$named_captures} \|\|
131	137		66	3		30829	none {defined} values %{$named_captures} );
	3					33
132							}
133
134							sub _enough_assignments {
135	120			120		278	my ($elem, $captures) = @_;
136
137							# look backward for the assignment operator
138	120					588	my $psib = $elem->sprevious_sibling;
139							SIBLING:
140	120					3409	while (1) {
141	195	100				1935	return if !$psib;
142	116	100				382	if ($psib->isa('PPI::Token::Operator')) {
143	70	100				214	last SIBLING if q{=} eq $psib->content;
144	32	100				189	return if q{!~} eq $psib->content;
145							}
146	75					254	$psib = $psib->sprevious_sibling;
147							}
148
149	38					278	$psib = $psib->sprevious_sibling;
150	38	50				797	return if !$psib; # syntax error: '=' at the beginning of a statement???
151
152	38	100				170	if ($psib->isa('PPI::Token::Symbol')) {
		100
		50
153							# @foo = m/(foo)/
154							# @$foo = m/(foo)/
155							# %foo = m/(foo)/
156							# %$foo = m/(foo)/
157	13	50				55	return $TRUE if _symbol_is_slurpy($psib);
158
159							} elsif ($psib->isa('PPI::Structure::Block')) {
160							# @{$foo} = m/(foo)/
161							# %{$foo} = m/(foo)/
162	2	50				10	return $TRUE if _is_preceded_by_array_or_hash_cast($psib);
163
164							} elsif ($psib->isa('PPI::Structure::List')) {
165							# () = m/(foo)/
166							# ($foo) = m/(foo)/
167							# ($foo,$bar) = m/(foo)(bar)/
168							# (@foo) = m/(foo)(bar)/
169							# ($foo,@foo) = m/(foo)(bar)/
170							# ($foo,@$foo) = m/(foo)(bar)/
171							# ($foo,@{$foo}) = m/(foo)(bar)/
172
173	23					483	my @args = $psib->schildren;
174	23	100				272	return $TRUE if not @args; # empty list (perhaps the "goatse" operator) is slurpy
175
176							# Forward looking: PPI might change in v1.200 so schild(0) is a
177							# PPI::Statement::Expression.
178	21	50	33			158	if ( 1 == @args && $args[0]->isa('PPI::Statement::Expression') ) {
179	21					92	@args = $args[0]->schildren;
180							}
181
182	21					282	my @parts = split_nodes_on_comma(@args);
183							PART:
184	21					75	for my $i (0 .. $#parts) {
185	33	100				47	if (1 == @{$parts[$i]}) {
	33					88
186	29					45	my $var = $parts[$i]->[0];
187	29	100	66			124	if ($var->isa('PPI::Token::Symbol') \|\| $var->isa('PPI::Token::Cast')) {
188	24	100				61	return $TRUE if _has_array_sigil($var);
189							}
190							}
191	28					191	_record_numbered_capture( $i + 1, $captures );
192							# ith variable capture
193							}
194							}
195
196	16			27		58	return none {not defined} @{$captures};
	27					146
	16					63
197							}
198
199							sub _symbol_is_slurpy {
200	13			13		29	my ($symbol) = @_;
201
202	13	100				37	return $TRUE if _has_array_sigil($symbol);
203	3	100				28	return $TRUE if _has_hash_sigil($symbol);
204	2	50				18	return $TRUE if _is_preceded_by_array_or_hash_cast($symbol);
205	0					0	return;
206							}
207
208							sub _has_array_sigil {
209	37			37		74	my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
210
211	37					96	return q{@} eq substr $elem->content, 0, 1;
212							}
213
214							sub _has_hash_sigil {
215	3			3		11	my ($elem) = @_; # Works on PPI::Token::Symbol and ::Cast
216
217	3					9	return q{%} eq substr $elem->content, 0, 1;
218							}
219
220							sub _is_preceded_by_array_or_hash_cast {
221	4			4		10	my ($elem) = @_;
222	4					17	my $psib = $elem->sprevious_sibling;
223	4					67	my $cast;
224	4		66			52	while ($psib && $psib->isa('PPI::Token::Cast')) {
225	4					11	$cast = $psib;
226	4					20	$psib = $psib->sprevious_sibling;
227							}
228	4	50				72	return if !$cast;
229	4					15	my $sigil = substr $cast->content, 0, 1;
230	4		66			88	return q{@} eq $sigil \|\| q{%} eq $sigil;
231							}
232
233							sub _is_in_slurpy_array_context {
234	87			87		219	my ($elem) = @_;
235
236							# return true is the result of the regexp is passed to a subroutine.
237							# doesn't check for array context due to assignment.
238
239							# look backward for explicit regex operator
240	87					238	my $psib = $elem->sprevious_sibling;
241	87	100	100			1533	if ($psib && $psib->content eq q{=~}) {
242							# Track back through value
243	2					16	$psib = _skip_lhs($psib);
244							}
245
246	87	100				375	if (!$psib) {
247	60					232	my $parent = $elem->parent;
248	60	50				334	return if !$parent;
249	60	50				300	if ($parent->isa('PPI::Statement')) {
250	60					164	$parent = $parent->parent;
251	60	50				298	return if !$parent;
252							}
253
254							# Return true if we have a list that isn't part of a foreach loop.
255							# TECHNICAL DEBT: This code is basically shared with
256							# RequireCheckingReturnValueOfEval. I don't want to put this code
257							# into Perl::Critic::Utils::*, but I don't have time to sort out
258							# PPIx::Utilities::Structure::List yet.
259	60	100				311	if ( $parent->isa('PPI::Structure::List') ) {
260	5	50				46	my $parent_statement = $parent->statement() or return $TRUE;
261	5	100				135	return $TRUE if not
262							$parent_statement->isa('PPI::Statement::Compound');
263	2	50				9	return $TRUE if $parent_statement->type() ne 'foreach';
264							}
265
266	57	100				400	return $TRUE if $parent->isa('PPI::Structure::Constructor');
267	55	100				248	if ($parent->isa('PPI::Structure::Block')) {
268	5	100				18	return $TRUE
269							if
270							refaddr($elem->statement)
271							eq refaddr([$parent->schildren]->[-1]);
272							}
273	53					353	return;
274							}
275	27	100				135	if ($psib->isa('PPI::Token::Operator')) {
276							# most operators kill slurpiness (except assignment, which is handled elsewhere)
277	19	100				54	return $TRUE if q{,} eq $psib->content;
278	17					106	return;
279							}
280	8					61	return $TRUE;
281							}
282
283							sub _skip_lhs {
284	2			2		5	my ($elem) = @_;
285
286							# TODO: better implementation to handle casts, expressions, subcalls, etc.
287	2					8	$elem = $elem->sprevious_sibling();
288
289	2					46	return $elem;
290							}
291
292							sub _enough_magic {
293	71			71		171	my ($elem, $re, $captures, $named_captures, $doc) = @_;
294
295	71					233	_check_for_magic($elem, $re, $captures, $named_captures, $doc);
296
297	85			85		294	return ( none {not defined} @{$captures} )
298							&& ( !%{$named_captures} \|\|
299	71		66	6		1067	none {defined} values %{$named_captures} );
	6					65
300							}
301
302							# void return
303							sub _check_for_magic {
304	71			71		166	my ($elem, $re, $captures, $named_captures, $doc) = @_;
305
306							# Search for $1..$9 in :
307							# * the rest of this statement
308							# * subsequent sibling statements
309							# * if this is in a conditional boolean, the if/else bodies of the conditional
310							# * if this is in a while/for condition, the loop body
311							# But NO intervening regexps!
312
313							# Package up the usual arguments for _check_rest_of_statement().
314	71					289	my $arg = {
315							regexp => $re,
316							numbered_captures => $captures,
317							named_captures => $named_captures,
318							document => $doc,
319							};
320
321							# Capture whether or not the regular expression is negated -- that
322							# is, whether it is preceded by the '!~' binding operator.
323	71	100				213	if ( my $prior_token = $elem->sprevious_sibling() ) {
324	18		100			454	$arg->{negated} = $prior_token->isa( 'PPI::Token::Operator' ) &&
325							q<!~> eq $prior_token->content();
326							}
327
328	71	100				1074	return if ! _check_rest_of_statement( $elem, $arg );
329
330	66					229	my $parent = $elem->parent();
331	66		100			698	while ($parent && ! $parent->isa('PPI::Statement::Sub')) {
332	165	100				932	return if ! _check_rest_of_statement( $parent, $arg );
333	131					377	$parent = $parent->parent();
334							}
335
336	32					206	return;
337							}
338
339							# Check if we are in the condition or block of a 'while'
340							sub _check_if_in_while_condition_or_block {
341	17			17		396	my ( $elem ) = @_;
342	17	50				84	$elem or return;
343
344	17	50				87	my $parent = $elem->parent() or return;
345	17	50				194	$parent->isa( 'PPI::Statement' ) or return;
346
347	17	50				57	my $item = $parent = $parent->parent() or return;
348	17	100				158	if ( $item->isa( 'PPI::Structure::Block' ) ) {
349	1	50				6	$item = $item->sprevious_sibling() or return;
350							}
351	17	100				145	$item->isa( 'PPI::Structure::Condition' ) or return;
352
353	3	50				16	$item = $item->sprevious_sibling() or return;
354	3	50				130	$item->isa( 'PPI::Token::Word' ) or return;
355
356	3					20	return $WHILE eq $item->content();
357							}
358
359							{
360							# Shortcut operators '\|\|', '//', and 'or' can cause everything after
361							# them to be skipped. 'and' trumps '\|\|' and '//', and causes things
362							# to be evaluated again. The value is true to skip, false to cancel
363							# skipping.
364							Readonly::Hash my %SHORTCUT_OPERATOR => (
365							q<\|\|> => $FALSE,
366							q<//> => $FALSE,
367							and => $TRUE,
368							or => $FALSE,
369							);
370
371							# RT #38942
372							# The issue in the ticket is that in something like
373							# if ( /(a)/ \|\| /(b)/ ) {
374							# say $1
375							# }
376							# the capture variable can come from either /(a)/ or /(b)/. If we
377							# don't take into account the short-cutting nature of the '\|\|' we
378							# erroneously conclude that the capture in /(a)/ is not used. So we
379							# need to skip every regular expression after an alternation.
380							#
381							# The trick is that we want to still mark magic variables, because
382							# of code like
383							# my $foo = $1 \|\| $2;
384							# so we can't just ignore everything after an alternation.
385							#
386							# To do all this correctly, we have to track precedence, and start
387							# paying attention again if an 'and' is found after a '\|\|'.
388
389							# Subroutine _make_regexp_checker() manufactures a snippet of code
390							# which is used to track regular expressions. It takes one optional
391							# argument, which is the snippet used to track the parent object's
392							# regular expressions.
393							#
394							# The snippet is passed each token encountered, and returns true if
395							# the scan for capture variables is to be stopped. This will happen
396							# if the token is a regular expression which is _not_ to the right
397							# of an alternation operator ('\|\|', '//', or 'or'), or it _is_ to
398							# the right of an 'and', without an intervening alternation
399							# operator.
400							#
401							# If _make_regexp_checker() was passed a snippet which
402							# returns false on encountering a regular expression, the returned
403							# snippet always returns false, for the benefit of code like
404							# /(a)/ \|\| ( /(b)/ \|\| /(c)/ ).
405
406							sub _make_regexp_checker {
407	596			596		876	my ( $parent ) = @_;
408
409							$parent
410							and not $parent->()
411	596	50	66	0		1373	and return sub { return $FALSE };
	0					0
412
413	596					940	my $check = $TRUE;
414	596					779	my $precedence = 0;
415
416							return sub {
417	1157			1157		1737	my ( $elem ) = @_;
418
419	1157	100				2836	$elem or return $check;
420
421	819	100				2588	if ( $elem->isa( 'PPI::Token::Regexp' ) ) {
422	48	100				130	return _regexp_is_in_split( $elem ) ? $FALSE : $check;
423							}
424
425	771	100	66			2375	if ( $elem->isa( 'PPI::Token::Structure' )
426							&& q<;> eq $elem->content() ) {
427	87					463	$check = $TRUE;
428	87					155	$precedence = 0;
429	87					254	return $FALSE;
430							}
431
432	684	100				2245	$elem->isa( 'PPI::Token::Operator' )
433							or return $FALSE;
434
435	83					194	my $content = $elem->content();
436	83	100				547	defined( my $oper_check = $SHORTCUT_OPERATOR{$content} )
437							or return $FALSE;
438
439	21					242	my $oper_precedence = precedence_of( $content );
440	21	100				156	$oper_precedence >= $precedence
441							or return $FALSE;
442
443	20					34	$precedence = $oper_precedence;
444	20					41	$check = $oper_check;
445
446	20					59	return $FALSE;
447	596					2116	};
448							}
449							}
450
451							# Argument is regexp.
452							# True if it is the regexp in a split()
453							sub _regexp_is_in_split {
454	48			48		81	my ( $elem ) = @_;
455
456	48					104	my $prev;
457	48	100				116	if ( ! ( $prev = $elem->sprevious_sibling() ) ) {
458							# Maybe we have split( /.../, ... )
459	32	50				674	my $stmt = $elem->statement()
460							or return $FALSE;
461	32	50				584	my $list = $stmt->parent()
462							or return $FALSE;
463	32	50				220	$prev = $elem->sprevious_sibling()
464							or return $FALSE;
465							}
466	16		100			601	return $prev->isa( 'PPI::Token::Word' ) && $SPLIT eq $prev->content();
467							}
468
469
470							# false if we hit another regexp
471							# The arguments are:
472							# $elem - The PPI::Element whose siblings are to be checked;
473							# $arg - A hash reference containing the following keys:
474							# regexp => the relevant PPIx::Regexp object;
475							# numbered_captures => a reference to the array used to track the
476							# use of numbered captures;
477							# named_captures => a reference to the hash used to track the
478							# use of named captures;
479							# negated => true if the regexp was bound to its target with the
480							# '!~' operator;
481							# document => a reference to the Perl::Critic::Document;
482							# Converted to passing the arguments everyone gets in a hash because of
483							# the need to add the 'negated' argument, which would put us at six
484							# arguments.
485							sub _check_rest_of_statement {
486	236			236		460	my ( $elem, $arg ) = @_;
487
488	236					461	my $checker = _make_regexp_checker();
489	236					744	my $nsib = $elem->snext_sibling;
490
491							# If we are an if (or elsif) and the result of the regexp is
492							# negated, we skip the first block found. RT #69867
493	236	100	100			4925	if ( $arg->{negated} && _is_condition_of_if_statement( $elem ) ) {
494	1		33			33	while ( $nsib && ! $nsib->isa( 'PPI::Structure::Block' ) ) {
495	0					0	$nsib = $nsib->snext_sibling();
496							}
497	1	50				7	$nsib and $nsib = $nsib->snext_sibling();
498							}
499
500	236					648	while ($nsib) {
501	203	100				2401	return if $checker->($nsib);
502	198	100				734	if ($nsib->isa('PPI::Node')) {
503	88	100				255	return if ! _check_node_children($nsib, $arg, $checker );
504							} else {
505							_mark_magic( $nsib, $arg->{regexp}, $arg->{numbered_captures},
506	110					344	$arg->{named_captures}, $arg->{document} );
507							}
508	164					1152	$nsib = $nsib->snext_sibling;
509							}
510	197					2849	return $TRUE;
511							}
512
513							{
514
515							Readonly::Hash my %IS_IF_STATEMENT => hashify( qw{ if elsif } );
516
517							# Return true if the argument is the condition of an if or elsif
518							# statement, otherwise return false.
519							sub _is_condition_of_if_statement {
520	11			11		19	my ( $elem ) = @_;
521	11	100	66			90	$elem
522							and $elem->isa( 'PPI::Structure::Condition' )
523							or return $FALSE;
524	1	50				5	my $psib = $elem->sprevious_sibling()
525							or return $FALSE;
526	1	50				31	$psib->isa( 'PPI::Token::Word' )
527							or return $FALSE;
528	1					4	return $IS_IF_STATEMENT{ $psib->content() };
529
530							}
531							}
532
533							# false if we hit another regexp
534							# The arguments are:
535							# $elem - The PPI::Node whose children are to be checked;
536							# $arg - A hash reference containing the following keys:
537							# regexp => the relevant PPIx::Regexp object;
538							# numbered_captures => a reference to the array used to track the
539							# use of numbered captures;
540							# named_captures => a reference to the hash used to track the
541							# use of named captures;
542							# document => a reference to the Perl::Critic::Document;
543							# $parent_checker - The parent's regexp checking code snippet,
544							# manufactured by _make_regexp_checker(). This argument is not in
545							# the $arg hash because that hash is shared among levels of the
546							# parse tree, whereas the regexp checker is not.
547							# TODO the things in the $arg hash are widely shared among the various
548							# pieces/parts of this policy; maybe more subroutines should use this
549							# hash rather than passing all this stuff around as individual
550							# arguments. This particular subroutine got the hash-reference treatment
551							# because Subroutines::ProhibitManyArgs started complaining when the
552							# checker argument was added.
553							sub _check_node_children {
554	338			338		599	my ($elem, $arg, $parent_checker) = @_;
555
556							# caveat: this will descend into subroutine definitions...
557
558	338					545	my $checker = _make_regexp_checker($parent_checker);
559	338					1049	for my $child ($elem->schildren) {
560	616	100				5023	return if $checker->($child);
561	582	100				1831	if ($child->isa('PPI::Node')) {
562	228	100				478	return if ! _check_node_children($child, $arg, $checker);
563							} else {
564							_mark_magic($child, $arg->{regexp},
565							$arg->{numbered_captures}, $arg->{named_captures},
566	354					832	$arg->{document});
567							}
568							}
569	255					1960	return $TRUE;
570							}
571
572							sub _mark_magic {
573	464			464		827	my ($elem, $re, $captures, $named_captures, $doc) = @_;
574
575							# If we're a double-quotish element, we need to grub through its
576							# content. RT #38942
577	464	100				792	if ( _is_double_quotish_element( $elem ) ) {
578	14					50	_mark_magic_in_content(
579							$elem->content(), $re, $captures, $named_captures, $doc );
580	14					38	return;
581							}
582
583							# Ditto a here document, though the logic is different. RT #38942
584	450	100				1436	if ( $elem->isa( 'PPI::Token::HereDoc' ) ) {
585	3	50				10	$elem->content() =~ m/ \A << ~? \s* ' /sxm
586							or _mark_magic_in_content(
587							join( $EMPTY, $elem->heredoc() ), $re, $captures,
588							$named_captures, $doc );
589	3					8	return;
590							}
591
592							# Only interested in magic, or known English equivalent.
593	447					923	my $content = $elem->content();
594	447	100				1877	my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
595							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
596							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
597							$elem->isa( 'PPI::Token::Magic' )
598	447	100	100			2524	or $capture_ref->{$content}
599							or return;
600
601	81	100				593	if ( $content =~ m/ \A \$ ( \d+ ) /xms ) {
		100
		100
602
603							# Record if we see $1, $2, $3, ...
604	43					149	my $num = $1;
605	43	100				139	if (0 < $num) { # don't mark $0
606							# Only mark the captures we really need -- don't mark superfluous magic vars
607	42	100				68	if ($num <= @{$captures}) {
	42					112
608	41					116	_record_numbered_capture( $num, $captures );
609							}
610							}
611							} elsif ( $capture_array->{$content} ) { # GitHub #778
612	1					10	foreach my $num ( 1 .. @{$captures} ) {
	1					5
613	1					6	_record_numbered_capture( $num, $captures );
614							}
615							} elsif ( $capture_ref->{$content} ) {
616	30					443	_mark_magic_subscripted_code( $elem, $re, $captures, $named_captures );
617							}
618	81					277	return;
619							}
620
621							# Record a named capture referenced by a hash or array found in code.
622							# The arguments are:
623							# $elem - The element that represents a subscripted capture variable;
624							# $re - The PPIx::Regexp object;
625							# $captures - A reference to the numbered capture array;
626							# $named_captures - A reference to the named capture hash.
627							sub _mark_magic_subscripted_code {
628	30			30		79	my ( $elem, $re, $captures, $named_captures ) = @_;
629	30	50				108	my $subscr = $elem->snext_sibling() or return;
630	30	50				894	$subscr->isa( 'PPI::Structure::Subscript' ) or return;
631	30					122	my $subval = $subscr->content();
632	30					1144	_record_subscripted_capture(
633							$elem->content(), $subval, $re, $captures, $named_captures );
634	30					75	return;
635							}
636
637							# Find capture variables in the content of a double-quotish thing, and
638							# record their use. RT #38942. The arguments are:
639							# $content - The content() ( or heredoc() in the case of a here
640							# document) to be analyzed;
641							# $re - The PPIx::Regexp object;
642							# $captures - A reference to the numbered capture array;
643							# $named_captures - A reference to the named capture hash.
644							sub _mark_magic_in_content {
645	17			17		135	my ( $content, $re, $captures, $named_captures, $doc ) = @_;
646
647	17	100				61	my ( $capture_ref, $capture_array ) = $doc->uses_module( 'English' ) ?
648							( \%CAPTURE_REFERENCE_ENGLISH, \%CAPTURE_ARRAY_ENGLISH ) :
649							( \%CAPTURE_REFERENCE, \%CAPTURE_ARRAY );
650
651	17					118	while ( $content =~ m< ( [\$\@] (?:
652							[{] \^? (?: \w+ \| . ) [}] \| \w+ \| . ) ) >sxmg ) {
653	10					30	my $name = $1;
654	10	100				42	$name =~ s/ \A ( [\$\@] ) [{] (?! \^ ) /$1/sxm
655							and $name =~ s/ [}] \z //sxm;
656
657	10	100	33			55	if ( $name =~ m/ \A \$ ( \d+ ) \z /sxm ) {
		100
		50
658
659	5					11	my $num = $1;
660							0 < $num
661	5	50	33			18	and $num <= @{ $captures }
	5					27
662							and _record_numbered_capture( $num, $captures );
663
664							} elsif ( $capture_array->{$name} ) { # GitHub #778
665	1					8	foreach my $num ( 1 .. @{$captures} ) {
	1					6
666	1					4	_record_numbered_capture( $num, $captures );
667							}
668
669							} elsif ( $capture_ref->{$name} &&
670							$content =~ m/ \G ( [{] [^}]+ [}] \| [[] [^]] []] ) /smxgc )
671							{
672	4					90	_record_subscripted_capture(
673							$name, $1, $re, $captures, $named_captures );
674
675							}
676							}
677	17					47	return;
678							}
679
680							# Return true if the given element is double-quotish. Always returns
681							# false for a PPI::Token::HereDoc, since they're a different beast.
682							# RT #38942.
683							sub _is_double_quotish_element {
684	464			464		703	my ( $elem ) = @_;
685
686	464	50				1117	$elem or return;
687
688	464					1034	my $content = $elem->content();
689
690	464	50				2307	if ( $elem->isa( 'PPI::Token::QuoteLike::Command' ) ) {
691	0					0	return $content !~ m/ \A qx \s* ' /sxm;
692							}
693
694	464					822	foreach my $class ( qw{
695							PPI::Token::Quote::Double
696							PPI::Token::Quote::Interpolate
697							PPI::Token::QuoteLike::Backtick
698							PPI::Token::QuoteLike::Readline
699							} ) {
700	1814	100				4685	$elem->isa( $class ) and return $TRUE;
701							}
702
703	450					930	return $FALSE;
704							}
705
706							# Record a subscripted capture, either hash dereference or array
707							# dereference. We assume that an array represents a numbered capture and
708							# a hash represents a named capture, since we have to handle (e.g.) both
709							# @+ and %+.
710							sub _record_subscripted_capture {
711	34			34		290	my ( $variable_name, $suffix, $re, $captures, $named_captures ) = @_;
712	34	100				261	if ( $suffix =~ m/ \A [{] ( .*? ) [}] /smx ) {
		50
713	8					44	( my $name = $1 ) =~ s/ \A ( ["'] ) ( .*? ) \1 \z /$2/smx;
714	8					31	_record_named_capture( $name, $captures, $named_captures );
715							} elsif ( $suffix =~ m/ \A [[] \s* ( [-+]? \d+ ) \s* []] /smx ) {
716							# GitHub #778
717							# Mostly capture numbers encountered here are 1-based (e.g. @+, @-).
718							# But @{^CAPTURE} is zero-based, so we need to tweak the subscript
719							# before we record the capture number.
720	26					93	my $num = $1 + 0;
721							$num >= 0
722	26	100	100			130	and $ZERO_BASED_CAPTURE_REFERENCE{$variable_name}
723							and $num++;
724	26					193	_record_numbered_capture( $num, $captures, $re );
725							}
726	34					98	return;
727							}
728
729							# Because a named capture is also one or more numbered captures, the recording
730							# of the use of a named capture seemed complex enough to wrap in a subroutine.
731							sub _record_named_capture {
732	8			8		26	my ( $name, $captures, $named_captures ) = @_;
733	8	50				40	defined ( my $numbers = $named_captures->{$name} ) or return;
734	8					21	foreach my $capnum ( @{ $numbers } ) {
	8					26
735	10					31	_record_numbered_capture( $capnum, $captures );
736							}
737	8					28	$named_captures->{$name} = undef;
738	8					19	return;
739							}
740
741							sub _record_numbered_capture {
742	114			114		313	my ( $number, $captures, $re ) = @_;
743	114	100	100			370	$re and $number < 0
744							and $number = $re->max_capture_number() + $number + 1;
745	114	50				464	return if $number <= 0;
746	114					210	$captures->[ $number - 1 ] = 1;
747	114					315	return;
748							}
749
750							1;
751
752							__END__
753
754							#-----------------------------------------------------------------------------
755
756							=pod
757
758							=for stopwords refactored
759
760							=head1 NAME
761
762							Perl::Critic::Policy::RegularExpressions::ProhibitUnusedCapture - Only use a capturing group if you plan to use the captured value.
763
764
765							=head1 AFFILIATION
766
767							This Policy is part of the core L<Perl::Critic\|Perl::Critic>
768							distribution.
769
770
771							=head1 DESCRIPTION
772
773							Perl regular expressions have multiple types of grouping syntax. The
774							basic parentheses (e.g. C<m/(foo)/>) captures into the magic variable
775							C<$1>. Non-capturing groups (e.g. C<m/(?:foo)/> are useful because
776							they have better runtime performance and do not copy strings to the
777							magic global capture variables.
778
779							It's also easier on the maintenance programmer if you consistently use
780							capturing vs. non-capturing groups, because that programmer can tell
781							more easily which regexps can be refactored without breaking
782							surrounding code which may use the captured values.
783
784
785							=head1 CONFIGURATION
786
787							This Policy is not configurable except for the standard options.
788
789
790							=head1 CAVEATS
791
792							=head2 C<qr//> interpolation
793
794							This policy can be confused by interpolation of C<qr//> elements, but
795							those are always false negatives. For example:
796
797							my $foo_re = qr/(foo)/;
798							my ($foo) = m/$foo_re (bar)/x;
799
800							A human can tell that this should be a violation because there are two
801							captures but only the first capture is used, not the second. The
802							policy only notices that there is one capture in the regexp and
803							remains happy.
804
805							=head2 C<@->, C<@+>, C<$LAST_MATCH_START> and C<$LAST_MATCH_END>
806
807							This policy will only recognize capture groups referred to by these
808							variables if the use is subscripted by a literal integer.
809
810							=head2 C<$^N> and C<$LAST_SUBMATCH_RESULT>
811
812							This policy will not recognize capture groups referred to only by these
813							variables, because there is in general no way by static analysis to
814							determine which capture group is referred to. For example,
815
816							m/ (?: (A[[:alpha:]]+) \| (N\d+) ) (?{$foo=$^N}) /smx
817
818							makes use of the first capture group if it matches, or the second
819							capture group if the first does not match but the second does.
820
821							=head2 split()
822
823							Normally, this policy thinks that if a capture is used at all it must be
824							used before the next regular expression in the same scope. The regular
825							expression in a C<split()> needs to be exempted because it does not
826							affect the caller's capture variables.
827
828							At present, this policy recognizes and exempts the regular expressions
829							in
830
831							split /.../, ...
832
833							and
834
835							split( /.../, ... )
836
837							but more exotic syntax may produce false positives.
838
839
840							=head1 CREDITS
841
842							Initial development of this policy was supported by a grant from the
843							Perl Foundation.
844
845
846							=head1 AUTHOR
847
848							Chris Dolan <cdolan@cpan.org>
849
850
851							=head1 COPYRIGHT
852
853							Copyright (c) 2007-2021 Chris Dolan. Many rights reserved.
854
855							This program is free software; you can redistribute it and/or modify
856							it under the same terms as Perl itself. The full text of this license
857							can be found in the LICENSE file included with this module
858
859							=cut
860
861							# Local Variables:
862							# mode: cperl
863							# cperl-indent-level: 4
864							# fill-column: 78
865							# indent-tabs-mode: nil
866							# c-indentation-style: bsd
867							# End:
868							# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :