File Coverage

blib/lib/Perl/Critic/Policy/Plicease/ProhibitUnicodeDigitInRegexp.pm
Criterion Covered Total %
statement 37 38 97.3
branch 8 8 100.0
condition n/a
subroutine 12 13 92.3
pod 4 5 80.0
total 61 64 95.3


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp;
2              
3 3     3   1795 use strict;
  3         7  
  3         73  
4 3     3   13 use warnings;
  3         8  
  3         56  
5 3     3   71 use 5.008001;
  3         9  
6 3     3   14 use Perl::Critic::Utils qw( $SEVERITY_LOW );
  3         6  
  3         258  
7 3     3   946 use PPIx::Regexp;
  3         228395  
  3         94  
8 3     3   20 use base qw( Perl::Critic::Policy );
  3         6  
  3         284  
9              
10             # ABSTRACT: Prohibit non-ASCII \d in regular expressions
11             our $VERSION = '0.03'; # VERSION
12              
13              
14 3     3   20 use constant DESC => 'Using non-ASCII \d';
  3         7  
  3         197  
15 3         623 use constant EXPL => 'The character class \d matches non-ASCI unicode digits. ' .
16 3     3   37 'Use [0-9] or the /a modifier (Perl 5.14+) instead.';
  3         6  
17              
18 11     11 0 43663 sub supported_parameters { () }
19 8     8 1 77 sub default_severity { $SEVERITY_LOW }
20 0     0 1 0 sub default_themes { () }
21 11     11 1 52005 sub applies_to { return ('PPI::Token::Regexp::Match',
22             'PPI::Token::Regexp::Substitute',
23             'PPI::Token::QuoteLike::Regexp') }
24              
25             sub violates
26             {
27 12     12 1 824 my($self, $elem) = @_;
28              
29 12         63 my %mods = $elem->get_modifiers();
30              
31             # if the whole expression uses /a then we are in the clear.
32 12 100       192 return if $mods{'a'};
33              
34             # if the user has explicitly specified the /u modifier then
35             # we should assume that they want unicode digits. Done.
36 11 100       27 return if $mods{'u'};
37              
38 10         34 my $re = PPIx::Regexp->new($elem->content);
39 10         19601 my $ccs = $re->find('PPIx::Regexp::Token::CharClass');
40 10 100       1491 return unless $ccs;
41 9         19 foreach my $cc (@$ccs)
42             {
43 9 100       24 next if $cc->content ne '\\d';
44 8         59 return $self->violation( DESC, EXPL, $elem );
45             }
46              
47 1         10 return;
48             }
49              
50             1;
51              
52             __END__
53              
54             =pod
55              
56             =encoding UTF-8
57              
58             =head1 NAME
59              
60             Perl::Critic::Policy::Plicease::ProhibitUnicodeDigitInRegexp - Prohibit non-ASCII \d in regular expressions
61              
62             =head1 VERSION
63              
64             version 0.03
65              
66             =head1 DESCRIPTION
67              
68             The character class C<\d> in a regular expression matches all unicode digit character, which
69             might not be what you expect if you are testing if a string can be used as a number in Perl.
70             Instead use either C<[0-9]>, or if you are on Perl 5.14 or better you can use the C</a>
71             modifier. This policy allows C<\d> in expressions with an explicit C</u> modifier (normally
72             on by default), as it indicates that the code is expecting Unicode semantics, including Unicode
73             digits.
74              
75             /\d/; # not ok
76             /\d/a; # ok
77             /\d/u; # ok
78             /[0-9]/; # ok
79              
80             =head1 AFFILIATION
81              
82             None.
83              
84             =head1 CONFIGURATION
85              
86             This policy is not configurable except for the standard options.
87              
88             =head1 CAVEATS
89              
90             This is not a general policy, and should not be applied toward all applications without
91             some thought. This is frequently true for L<Perl::Critic> policies, but especially so
92             for this policy.
93              
94             In the general the ability to match against unicode digits is a useful ability, and doesn't
95             constitute bad code. Some applications don't ever need to match non-ASCII digit characters,
96             and incorrectly rely on C<\d> to validate as a number.
97              
98             This policy doesn't take into account using the L<re> pragma.
99              
100             use re '/a';
101            
102             /\d/; # (still) not ok
103              
104             =head1 AUTHOR
105              
106             Graham Ollis <plicease@cpan.org>
107              
108             =head1 COPYRIGHT AND LICENSE
109              
110             This software is copyright (c) 2019 by Graham Ollis.
111              
112             This is free software; you can redistribute it and/or modify it under
113             the same terms as the Perl 5 programming language system itself.
114              
115             =cut