File Coverage

blib/lib/Perl/Critic/Policy/Plicease/ProhibitUnicodeDigitInRegexp.pm

Criterion	Covered	Total	%
statement	37	38	97.3
branch	8	8	100.0
condition			n/a
subroutine	12	13	92.3
pod	4	5	80.0
total	61	64	95.3

line	stmt	bran	sub	pod	time	code
1						package Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp;
2
3	3		3		1795	use strict;
	3				7
	3				73
4	3		3		13	use warnings;
	3				8
	3				56
5	3		3		71	use 5.008001;
	3				9
6	3		3		14	use Perl::Critic::Utils qw( $SEVERITY_LOW );
	3				6
	3				258
7	3		3		946	use PPIx::Regexp;
	3				228395
	3				94
8	3		3		20	use base qw( Perl::Critic::Policy );
	3				6
	3				284
9
10						# ABSTRACT: Prohibit non-ASCII \d in regular expressions
11						our $VERSION = '0.03'; # VERSION
12
13
14	3		3		20	use constant DESC => 'Using non-ASCII \d';
	3				7
	3				197
15	3				623	use constant EXPL => 'The character class \d matches non-ASCI unicode digits. ' .
16	3		3		37	'Use [0-9] or the /a modifier (Perl 5.14+) instead.';
	3				6
17
18	11		11	0	43663	sub supported_parameters { () }
19	8		8	1	77	sub default_severity { $SEVERITY_LOW }
20	0		0	1	0	sub default_themes { () }
21	11		11	1	52005	sub applies_to { return ('PPI::Token::Regexp::Match',
22						'PPI::Token::Regexp::Substitute',
23						'PPI::Token::QuoteLike::Regexp') }
24
25						sub violates
26						{
27	12		12	1	824	my($self, $elem) = @_;
28
29	12				63	my %mods = $elem->get_modifiers();
30
31						# if the whole expression uses /a then we are in the clear.
32	12	100			192	return if $mods{'a'};
33
34						# if the user has explicitly specified the /u modifier then
35						# we should assume that they want unicode digits. Done.
36	11	100			27	return if $mods{'u'};
37
38	10				34	my $re = PPIx::Regexp->new($elem->content);
39	10				19601	my $ccs = $re->find('PPIx::Regexp::Token::CharClass');
40	10	100			1491	return unless $ccs;
41	9				19	foreach my $cc (@$ccs)
42						{
43	9	100			24	next if $cc->content ne '\\d';
44	8				59	return $self->violation( DESC, EXPL, $elem );
45						}
46
47	1				10	return;
48						}
49
50						1;
51
52						__END__
53
54						=pod
55
56						=encoding UTF-8
57
58						=head1 NAME
59
60						Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp - Prohibit non-ASCII \d in regular expressions
61
62						=head1 VERSION
63
64						version 0.03
65
66						=head1 DESCRIPTION
67
68						The character class C<\d> in a regular expression matches all unicode digit character, which
69						might not be what you expect if you are testing if a string can be used as a number in Perl.
70						Instead use either C<[0-9]>, or if you are on Perl 5.14 or better you can use the C</a>
71						modifier. This policy allows C<\d> in expressions with an explicit C</u> modifier (normally
72						on by default), as it indicates that the code is expecting Unicode semantics, including Unicode
73						digits.
74
75						/\d/; # not ok
76						/\d/a; # ok
77						/\d/u; # ok
78						/[0-9]/; # ok
79
80						=head1 AFFILIATION
81
82						None.
83
84						=head1 CONFIGURATION
85
86						This policy is not configurable except for the standard options.
87
88						=head1 CAVEATS
89
90						This is not a general policy, and should not be applied toward all applications without
91						some thought. This is frequently true for L<Perl::Critic> policies, but especially so
92						for this policy.
93
94						In the general the ability to match against unicode digits is a useful ability, and doesn't
95						constitute bad code. Some applications don't ever need to match non-ASCII digit characters,
96						and incorrectly rely on C<\d> to validate as a number.
97
98						This policy doesn't take into account using the L<re> pragma.
99
100						use re '/a';
101
102						/\d/; # (still) not ok
103
104						=head1 AUTHOR
105
106						Graham Ollis <plicease@cpan.org>
107
108						=head1 COPYRIGHT AND LICENSE
109
110						This software is copyright (c) 2019 by Graham Ollis.
111
112						This is free software; you can redistribute it and/or modify it under
113						the same terms as the Perl 5 programming language system itself.
114
115						=cut