File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitEscapedMetacharacters.pm
Criterion Covered Total %
statement 21 32 65.6
branch 0 14 0.0
condition n/a
subroutine 10 11 90.9
pod 4 5 80.0
total 35 62 56.4


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitEscapedMetacharacters;
2              
3 40     40   27832 use 5.010001;
  40         192  
4 40     40   280 use strict;
  40         129  
  40         862  
5 40     40   246 use warnings;
  40         119  
  40         1100  
6              
7 40     40   269 use Readonly;
  40         143  
  40         2202  
8              
9 40     40   346 use Perl::Critic::Utils qw( :severities hashify );
  40         141  
  40         2272  
10 40     40   5825 use parent 'Perl::Critic::Policy';
  40         142  
  40         265  
11              
12             our $VERSION = '1.150';
13              
14             #-----------------------------------------------------------------------------
15              
16             Readonly::Scalar my $DESC => q{Use character classes for literal metachars instead of escapes};
17             Readonly::Scalar my $EXPL => [247];
18              
19             Readonly::Hash my %REGEXP_METACHARS => hashify(split / /xms, '{ } ( ) . * + ? |');
20              
21             #-----------------------------------------------------------------------------
22              
23 89     89 0 1674 sub supported_parameters { return qw() }
24 74     74 1 352 sub default_severity { return $SEVERITY_LOWEST }
25 84     84 1 419 sub default_themes { return qw( core pbp cosmetic ) }
26 30     30 1 127 sub applies_to { return qw(PPI::Token::Regexp::Match
27             PPI::Token::Regexp::Substitute
28             PPI::Token::QuoteLike::Regexp) }
29              
30             #-----------------------------------------------------------------------------
31              
32             sub violates {
33 0     0 1   my ( $self, $elem, $document ) = @_;
34              
35             # optimization: don't bother parsing the regexp if there are no escapes
36 0 0         return if $elem !~ m/\\/xms;
37              
38 0 0         my $re = $document->ppix_regexp_from_element( $elem ) or return;
39 0 0         $re->failures() and return;
40 0 0         my $qr = $re->regular_expression() or return;
41              
42 0 0         my $exacts = $qr->find( 'PPIx::Regexp::Token::Literal' ) or return;
43 0           foreach my $exact( @{ $exacts } ) {
  0            
44 0 0         $exact->content() =~ m/ \\ ( . ) /xms or next;
45 0 0         return $self->violation( $DESC, $EXPL, $elem ) if $REGEXP_METACHARS{$1};
46             }
47              
48 0           return; # OK
49             }
50              
51             1;
52              
53             __END__
54              
55             #-----------------------------------------------------------------------------
56              
57             =pod
58              
59             =for stopwords IPv4
60              
61             =head1 NAME
62              
63             Perl::Critic::Policy::RegularExpressions::ProhibitEscapedMetacharacters - Use character classes for literal meta-characters instead of escapes.
64              
65              
66             =head1 AFFILIATION
67              
68             This Policy is part of the core L<Perl::Critic|Perl::Critic>
69             distribution.
70              
71              
72             =head1 DESCRIPTION
73              
74             Ever heard of leaning toothpick syndrome? That comes from writing
75             regular expressions that match on characters that are significant in
76             regular expressions. For example, the expression to match four
77             forward slashes looks like:
78              
79             m/\/\/\/\//;
80              
81             Well, this policy doesn't solve that problem (write it as C<m{////}>
82             instead!) but solves a related one. As seen above, the escapes make
83             the expression hard to parse visually. One solution is to use
84             character classes. You see, inside of character classes, the only
85             characters that are special are C<\>, C<]>, C<^> and C<->, so you
86             don't need to escape the others. So instead of the following loose
87             IPv4 address matcher:
88              
89             m/ \d+ \. \d+ \. \d+ \. \d+ /x;
90              
91             You could write:
92              
93             m/ \d+ [.] \d+ [.] \d+ [.] \d+ /x;
94              
95             which is certainly more readable, if less recognizable prior the
96             publication of Perl Best Practices. (Of course, you should really use
97             L<Regexp::Common::net|Regexp::Common::net> to match IPv4 addresses!)
98              
99             Specifically, this policy forbids backslashes immediately prior to the
100             following characters:
101              
102             { } ( ) . * + ? | #
103              
104             We make special exception for C<$> because C</[$]/> turns into
105             C</[5.008006/> for Perl 5.8.6. We also make an exception for C<^>
106             because it has special meaning (negation) in a character class.
107             Finally, C<[> and C<]> are exempt, of course, because they are awkward
108             to represent in character classes.
109              
110             Note that this policy does not forbid unnecessary escaping. So go
111             ahead and (pointlessly) escape C<!> characters.
112              
113              
114             =head1 CONFIGURATION
115              
116             This Policy is not configurable except for the standard options.
117              
118              
119             =head1 BUGS
120              
121             Perl treats C<m/[#]/x> in unexpected ways.
122             I think it's a bug in Perl itself, but am not 100% sure that I have
123             not simply misunderstood...
124              
125             This part makes sense:
126              
127             "#f" =~ m/[#]f/x; # match
128             "#f" =~ m/[#]a/x; # no match
129              
130             This doesn't:
131              
132             $qr = qr/f/;
133             "#f" =~ m/[#]$qr/x; # no match
134              
135             Neither does this:
136              
137             print qr/[#]$qr/x; # yields '(?x-ism:[#]$qr
138             )'
139              
140             =head1 CREDITS
141              
142             Initial development of this policy was supported by a grant from the
143             Perl Foundation.
144              
145              
146             =head1 AUTHOR
147              
148             Chris Dolan <cdolan@cpan.org>
149              
150              
151             =head1 COPYRIGHT
152              
153             Copyright (c) 2007-2023 Chris Dolan
154              
155             This program is free software; you can redistribute it and/or modify
156             it under the same terms as Perl itself. The full text of this license
157             can be found in the LICENSE file included with this module
158              
159             =cut
160              
161             # Local Variables:
162             # mode: cperl
163             # cperl-indent-level: 4
164             # fill-column: 78
165             # indent-tabs-mode: nil
166             # c-indentation-style: bsd
167             # End:
168             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :