File Coverage

blib/lib/Perl/Critic/Policy/InputOutput/RequireEncodingWithUTF8Layer.pm
Criterion Covered Total %
statement 42 43 97.6
branch 13 16 81.2
condition 1 3 33.3
subroutine 15 15 100.0
pod 4 5 80.0
total 75 82 91.4


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::InputOutput::RequireEncodingWithUTF8Layer;
2              
3 40     40   27396 use 5.010001;
  40         193  
4 40     40   273 use strict;
  40         115  
  40         819  
5 40     40   207 use warnings;
  40         107  
  40         963  
6              
7 40     40   218 use Readonly;
  40         94  
  40         1826  
8              
9 40     40   276 use version;
  40         138  
  40         303  
10              
11 40     40   3383 use Perl::Critic::Utils qw{ :severities :ppi };
  40         140  
  40         2308  
12 40     40   6860 use parent 'Perl::Critic::Policy';
  40         124  
  40         260  
13              
14             our $VERSION = '1.146';
15              
16             #-----------------------------------------------------------------------------
17              
18             Readonly::Scalar my $DESC => q{I/O layer ":utf8" used};
19             Readonly::Scalar my $EXPL => q{Use ":encoding(UTF-8)" to get strict validation};
20              
21             Readonly::Scalar my $THREE_ARGUMENT_OPEN => 3;
22             Readonly::Hash my %RECOVER_ENCODING => (
23             binmode => \&_recover_binmode_encoding,
24             open => \&_recover_open_encoding,
25             );
26              
27             #-----------------------------------------------------------------------------
28              
29 91     91 0 1690 sub supported_parameters { return () }
30 107     107 1 459 sub default_severity { return $SEVERITY_HIGHEST }
31 74     74 1 315 sub default_themes { return qw(core bugs security) }
32 38     38 1 141 sub applies_to { return 'PPI::Token::Word' }
33              
34             #-----------------------------------------------------------------------------
35              
36             sub violates {
37 532     532 1 1053 my ($self, $elem, $document) = @_;
38              
39 532 100       1186 my $handler = $RECOVER_ENCODING{ $elem->content() }
40             or return; # If we don't have a handler, we're not interested.
41 86 100       1112 my $encoding = $handler->( parse_arg_list( $elem ) )
42             or return; # If we can't recover an encoding, we give up.
43 53 100       412 return if $encoding !~ m/ (?: \A | : ) utf8 \b /smxi; # OK
44              
45 33         120 return $self->violation( $DESC, $EXPL, $elem );
46             }
47              
48             #-----------------------------------------------------------------------------
49              
50             # my $string = _get_argument_string( $arg[1] );
51             #
52             # This subroutine returns the string from the given argument (which must
53             # be a reference to an array of PPI objects), _PROVIDED_ the array
54             # contains a single PPI::Token::Quote object. Otherwise it simply
55             # returns, since we're too stupid to analyze anything else.
56              
57             sub _get_argument_string {
58 73     73   138 my ( $arg ) = @_;
59 73 100       215 ref $arg eq 'ARRAY' or return;
60 65 50 33     107 return if @{ $arg } == 0 || @{ $arg } > 1;
  65         172  
  65         185  
61 65 50       299 return $arg->[0]->string() if $arg->[0]->isa( 'PPI::Token::Quote' );
62 0         0 return;
63             }
64              
65             #-----------------------------------------------------------------------------
66              
67             # my $encoding = _recover_binmode_encoding( _parse_arg_list( $elem ) );
68             #
69             # This subroutine returns the encoding specified by the given $elem,
70             # which _MUST_ be the 'binmode' of a binmode() call.
71              
72             sub _recover_binmode_encoding {
73 32     32   76 my ( @args ) = @_;
74 32         74 return _get_argument_string( $args[1] );
75             }
76              
77             #-----------------------------------------------------------------------------
78              
79             # my $encoding = _recover_open_encoding( _parse_arg_list( $elem ) );
80             #
81             # This subroutine returns the encoding specified by the given $elem,
82             # which _MUST_ be the 'open' of a open() call.
83              
84             sub _recover_open_encoding {
85 54     54   117 my ( @args ) = @_;
86 54 100       177 @args < $THREE_ARGUMENT_OPEN
87             and return;
88 41 50       102 defined( my $string = _get_argument_string( $args[1] ) )
89             or return;
90 41         443 $string =~ s/ [+]? (?: < | >{1,2} ) //smx;
91 41         183 return $string;
92             }
93              
94             1;
95              
96             __END__
97              
98             #-----------------------------------------------------------------------------
99              
100             =pod
101              
102             =for stopwords PerlIO PerlMonks Wiki
103              
104             =head1 NAME
105              
106             Perl::Critic::Policy::InputOutput::RequireEncodingWithUTF8Layer - Write C<< open $fh, q{<:encoding(UTF-8)}, $filename; >> instead of C<< open $fh, q{<:utf8}, $filename; >>.
107              
108             =head1 AFFILIATION
109              
110             This Policy is part of the core L<Perl::Critic|Perl::Critic>
111             distribution.
112              
113              
114             =head1 DESCRIPTION
115              
116             Use of the C<:utf8> I/O layer (as opposed to C<:encoding(UTF8)> or
117             C<:encoding(UTF-8)>) was suggested in the Perl documentation up to
118             version 5.8.8. This may be OK for output, but on input C<:utf8> does not
119             validate the input, leading to unexpected results.
120              
121             An exploit based on this behavior of C<:utf8> is exhibited on PerlMonks
122             at L<http://www.perlmonks.org/?node_id=644786>. The exploit involves a
123             string read from an external file and sanitized with C<m/^(\w+)$/>,
124             where C<$1> nonetheless ends up containing shell meta-characters.
125              
126             To summarize:
127              
128             open $fh, '<:utf8', 'foo.txt'; # BAD
129             open $fh, '<:encoding(UTF8)', 'foo.txt'; # GOOD
130             open $fh, '<:encoding(UTF-8)', 'foo.txt'; # BETTER
131              
132             See the L<Encode|Encode> documentation for the difference between
133             C<UTF8> and C<UTF-8>. The short version is that C<UTF-8> implements the
134             Unicode standard, and C<UTF8> is liberalized.
135              
136             For consistency's sake, this policy checks files opened for output as
137             well as input. For complete coverage it also checks C<binmode()> calls,
138             where the direction of operation can not be determined.
139              
140              
141             =head1 CONFIGURATION
142              
143             This Policy is not configurable except for the standard options.
144              
145              
146             =head1 NOTES
147              
148             Because C<Perl::Critic> does a static analysis, this policy can not
149             detect cases like
150              
151             my $encoding = ':utf8';
152             binmode $fh, $encoding;
153              
154             where the encoding is computed.
155              
156              
157             =head1 SEE ALSO
158              
159             L<PerlIO|PerlIO>
160              
161             L<Encode|Encode>
162              
163             C<perldoc -f binmode>
164              
165             L<http://www.socialtext.net/perl5/index.cgi?the_utf8_perlio_layer>
166              
167             L<http://www.perlmonks.org/?node_id=644786>
168              
169             =head1 AUTHOR
170              
171             Thomas R. Wyant, III F<wyant at cpan dot org>
172              
173             =head1 COPYRIGHT
174              
175             Copyright (c) 2010-2011 Thomas R. Wyant, III
176              
177             This program is free software; you can redistribute it and/or modify
178             it under the same terms as Perl itself.
179              
180             =cut
181              
182             # Local Variables:
183             # mode: cperl
184             # cperl-indent-level: 4
185             # fill-column: 78
186             # indent-tabs-mode: nil
187             # c-indentation-style: bsd
188             # End:
189             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :