File Coverage

blib/lib/Perl/Critic/Policy/RegularExpressions/ProhibitEscapedMetacharacters.pm
Criterion Covered Total %
statement 35 35 100.0
branch 10 14 71.4
condition n/a
subroutine 12 12 100.0
pod 4 5 80.0
total 61 66 92.4


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::RegularExpressions::ProhibitEscapedMetacharacters;
2              
3 40     40   27603 use 5.010001;
  40         190  
4 40     40   326 use strict;
  40         108  
  40         884  
5 40     40   237 use warnings;
  40         125  
  40         1266  
6              
7 40     40   345 use English qw(-no_match_vars);
  40         143  
  40         326  
8 40     40   15267 use Readonly;
  40         138  
  40         2235  
9              
10 40     40   301 use Perl::Critic::Utils qw{ :booleans :severities hashify };
  40         135  
  40         2065  
11 40     40   7394 use parent 'Perl::Critic::Policy';
  40         130  
  40         263  
12              
13             our $VERSION = '1.146';
14              
15             #-----------------------------------------------------------------------------
16              
17             Readonly::Scalar my $DESC => q{Use character classes for literal metachars instead of escapes};
18             Readonly::Scalar my $EXPL => [247];
19              
20             Readonly::Hash my %REGEXP_METACHARS => hashify(split / /xms, '{ } ( ) . * + ? |');
21              
22             #-----------------------------------------------------------------------------
23              
24 95     95 0 1631 sub supported_parameters { return qw() }
25 78     78 1 410 sub default_severity { return $SEVERITY_LOWEST }
26 84     84 1 376 sub default_themes { return qw( core pbp cosmetic ) }
27 36     36 1 150 sub applies_to { return qw(PPI::Token::Regexp::Match
28             PPI::Token::Regexp::Substitute
29             PPI::Token::QuoteLike::Regexp) }
30              
31             #-----------------------------------------------------------------------------
32              
33             sub violates {
34 16     16 1 42 my ( $self, $elem, $document ) = @_;
35              
36             # optimization: don't bother parsing the regexp if there are no escapes
37 16 100       59 return if $elem !~ m/\\/xms;
38              
39 15 50       132 my $re = $document->ppix_regexp_from_element( $elem ) or return;
40 15 50       126845 $re->failures() and return;
41 15 50       117 my $qr = $re->regular_expression() or return;
42              
43 15 50       319 my $exacts = $qr->find( 'PPIx::Regexp::Token::Literal' ) or return;
44 15         4817 foreach my $exact( @{ $exacts } ) {
  15         39  
45 86 100       467 $exact->content() =~ m/ \\ ( . ) /xms or next;
46 45 100       428 return $self->violation( $DESC, $EXPL, $elem ) if $REGEXP_METACHARS{$1};
47             }
48              
49 11         102 return; # OK
50             }
51              
52             1;
53              
54             __END__
55              
56             #-----------------------------------------------------------------------------
57              
58             =pod
59              
60             =for stopwords IPv4
61              
62             =head1 NAME
63              
64             Perl::Critic::Policy::RegularExpressions::ProhibitEscapedMetacharacters - Use character classes for literal meta-characters instead of escapes.
65              
66              
67             =head1 AFFILIATION
68              
69             This Policy is part of the core L<Perl::Critic|Perl::Critic>
70             distribution.
71              
72              
73             =head1 DESCRIPTION
74              
75             Ever heard of leaning toothpick syndrome? That comes from writing
76             regular expressions that match on characters that are significant in
77             regular expressions. For example, the expression to match four
78             forward slashes looks like:
79              
80             m/\/\/\/\//;
81              
82             Well, this policy doesn't solve that problem (write it as C<m{////}>
83             instead!) but solves a related one. As seen above, the escapes make
84             the expression hard to parse visually. One solution is to use
85             character classes. You see, inside of character classes, the only
86             characters that are special are C<\>, C<]>, C<^> and C<->, so you
87             don't need to escape the others. So instead of the following loose
88             IPv4 address matcher:
89              
90             m/ \d+ \. \d+ \. \d+ \. \d+ /x;
91              
92             You could write:
93              
94             m/ \d+ [.] \d+ [.] \d+ [.] \d+ /x;
95              
96             which is certainly more readable, if less recognizable prior the
97             publication of Perl Best Practices. (Of course, you should really use
98             L<Regexp::Common::net|Regexp::Common::net> to match IPv4 addresses!)
99              
100             Specifically, this policy forbids backslashes immediately prior to the
101             following characters:
102              
103             { } ( ) . * + ? | #
104              
105             We make special exception for C<$> because C</[$]/> turns into
106             C</[5.008006/> for Perl 5.8.6. We also make an exception for C<^>
107             because it has special meaning (negation) in a character class.
108             Finally, C<[> and C<]> are exempt, of course, because they are awkward
109             to represent in character classes.
110              
111             Note that this policy does not forbid unnecessary escaping. So go
112             ahead and (pointlessly) escape C<!> characters.
113              
114              
115             =head1 CONFIGURATION
116              
117             This Policy is not configurable except for the standard options.
118              
119              
120             =head1 BUGS
121              
122             Perl treats C<m/[#]/x> in unexpected ways.
123             I think it's a bug in Perl itself, but am not 100% sure that I have
124             not simply misunderstood...
125              
126             This part makes sense:
127              
128             "#f" =~ m/[#]f/x; # match
129             "#f" =~ m/[#]a/x; # no match
130              
131             This doesn't:
132              
133             $qr = qr/f/;
134             "#f" =~ m/[#]$qr/x; # no match
135              
136             Neither does this:
137              
138             print qr/[#]$qr/x; # yields '(?x-ism:[#]$qr
139             )'
140              
141             =head1 CREDITS
142              
143             Initial development of this policy was supported by a grant from the
144             Perl Foundation.
145              
146              
147             =head1 AUTHOR
148              
149             Chris Dolan <cdolan@cpan.org>
150              
151              
152             =head1 COPYRIGHT
153              
154             Copyright (c) 2007-2011 Chris Dolan. Many rights reserved.
155              
156             This program is free software; you can redistribute it and/or modify
157             it under the same terms as Perl itself. The full text of this license
158             can be found in the LICENSE file included with this module
159              
160             =cut
161              
162             # Local Variables:
163             # mode: cperl
164             # cperl-indent-level: 4
165             # fill-column: 78
166             # indent-tabs-mode: nil
167             # c-indentation-style: bsd
168             # End:
169             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :