File Coverage

blib/lib/Perl/Critic/Policy/InputOutput/RequireEncodingWithUTF8Layer.pm
Criterion Covered Total %
statement 23 40 57.5
branch 1 16 6.2
condition 0 3 0.0
subroutine 11 14 78.5
pod 4 5 80.0
total 39 78 50.0


line stmt bran cond sub pod time code
1             package Perl::Critic::Policy::InputOutput::RequireEncodingWithUTF8Layer;
2              
3 40     40   27110 use 5.010001;
  40         177  
4 40     40   276 use strict;
  40         107  
  40         914  
5 40     40   226 use warnings;
  40         114  
  40         1020  
6              
7 40     40   278 use Readonly;
  40         111  
  40         2008  
8              
9 40     40   304 use Perl::Critic::Utils qw{ :severities :ppi };
  40         448  
  40         2131  
10 40     40   6434 use parent 'Perl::Critic::Policy';
  40         141  
  40         308  
11              
12             our $VERSION = '1.150';
13              
14             #-----------------------------------------------------------------------------
15              
16             Readonly::Scalar my $DESC => q{I/O layer ":utf8" used};
17             Readonly::Scalar my $EXPL => q{Use ":encoding(UTF-8)" to get strict validation};
18              
19             Readonly::Scalar my $THREE_ARGUMENT_OPEN => 3;
20             Readonly::Hash my %RECOVER_ENCODING => (
21             binmode => \&_recover_binmode_encoding,
22             open => \&_recover_open_encoding,
23             );
24              
25             #-----------------------------------------------------------------------------
26              
27 89     89 0 1610 sub supported_parameters { return () }
28 74     74 1 341 sub default_severity { return $SEVERITY_HIGHEST }
29 74     74 1 376 sub default_themes { return qw(core bugs security) }
30 36     36 1 121 sub applies_to { return 'PPI::Token::Word' }
31              
32             #-----------------------------------------------------------------------------
33              
34             sub violates {
35 358     358 1 668 my ($self, $elem, $document) = @_;
36              
37 358 50       605 my $handler = $RECOVER_ENCODING{ $elem->content() }
38             or return; # If we don't have a handler, we're not interested.
39 0 0         my $encoding = $handler->( parse_arg_list( $elem ) )
40             or return; # If we can't recover an encoding, we give up.
41 0 0         return if $encoding !~ m/ (?: \A | : ) utf8 \b /smxi; # OK
42              
43 0           return $self->violation( $DESC, $EXPL, $elem );
44             }
45              
46             #-----------------------------------------------------------------------------
47              
48             # my $string = _get_argument_string( $arg[1] );
49             #
50             # This subroutine returns the string from the given argument (which must
51             # be a reference to an array of PPI objects), _PROVIDED_ the array
52             # contains a single PPI::Token::Quote object. Otherwise it simply
53             # returns, since we're too stupid to analyze anything else.
54              
55             sub _get_argument_string {
56 0     0     my ( $arg ) = @_;
57 0 0         ref $arg eq 'ARRAY' or return;
58 0 0 0       return if @{ $arg } == 0 || @{ $arg } > 1;
  0            
  0            
59 0 0         return $arg->[0]->string() if $arg->[0]->isa( 'PPI::Token::Quote' );
60 0           return;
61             }
62              
63             #-----------------------------------------------------------------------------
64              
65             # my $encoding = _recover_binmode_encoding( _parse_arg_list( $elem ) );
66             #
67             # This subroutine returns the encoding specified by the given $elem,
68             # which _MUST_ be the 'binmode' of a binmode() call.
69              
70             sub _recover_binmode_encoding {
71 0     0     my ( @args ) = @_;
72 0           return _get_argument_string( $args[1] );
73             }
74              
75             #-----------------------------------------------------------------------------
76              
77             # my $encoding = _recover_open_encoding( _parse_arg_list( $elem ) );
78             #
79             # This subroutine returns the encoding specified by the given $elem,
80             # which _MUST_ be the 'open' of an open() call.
81              
82             sub _recover_open_encoding {
83 0     0     my ( @args ) = @_;
84 0 0         @args < $THREE_ARGUMENT_OPEN
85             and return;
86 0 0         defined( my $string = _get_argument_string( $args[1] ) )
87             or return;
88 0           $string =~ s/ [+]? (?: < | >{1,2} ) //smx;
89 0           return $string;
90             }
91              
92             1;
93              
94             __END__
95              
96             #-----------------------------------------------------------------------------
97              
98             =pod
99              
100             =for stopwords PerlIO PerlMonks Wiki
101              
102             =head1 NAME
103              
104             Perl::Critic::Policy::InputOutput::RequireEncodingWithUTF8Layer - Write C<< open $fh, q{<:encoding(UTF-8)}, $filename; >> instead of C<< open $fh, q{<:utf8}, $filename; >>.
105              
106             =head1 AFFILIATION
107              
108             This Policy is part of the core L<Perl::Critic|Perl::Critic>
109             distribution.
110              
111              
112             =head1 DESCRIPTION
113              
114             Use of the C<:utf8> I/O layer (as opposed to C<:encoding(UTF8)> or
115             C<:encoding(UTF-8)>) was suggested in the Perl documentation up to
116             version 5.8.8. This may be OK for output, but on input C<:utf8> does not
117             validate the input, leading to unexpected results.
118              
119             An exploit based on this behavior of C<:utf8> is exhibited on PerlMonks
120             at L<http://www.perlmonks.org/?node_id=644786>. The exploit involves a
121             string read from an external file and sanitized with C<m/^(\w+)$/>,
122             where C<$1> nonetheless ends up containing shell meta-characters.
123              
124             To summarize:
125              
126             open $fh, '<:utf8', 'foo.txt'; # BAD
127             open $fh, '<:encoding(UTF8)', 'foo.txt'; # GOOD
128             open $fh, '<:encoding(UTF-8)', 'foo.txt'; # BETTER
129              
130             See the L<Encode|Encode> documentation for the difference between
131             C<UTF8> and C<UTF-8>. The short version is that C<UTF-8> implements the
132             Unicode standard, and C<UTF8> is liberalized.
133              
134             For consistency's sake, this policy checks files opened for output as
135             well as input. For complete coverage it also checks C<binmode()> calls,
136             where the direction of operation can not be determined.
137              
138              
139             =head1 CONFIGURATION
140              
141             This Policy is not configurable except for the standard options.
142              
143              
144             =head1 NOTES
145              
146             Because C<Perl::Critic> does a static analysis, this policy can not
147             detect cases like
148              
149             my $encoding = ':utf8';
150             binmode $fh, $encoding;
151              
152             where the encoding is computed.
153              
154              
155             =head1 SEE ALSO
156              
157             L<PerlIO|PerlIO>
158              
159             L<Encode|Encode>
160              
161             C<perldoc -f binmode>
162              
163             L<http://www.socialtext.net/perl5/index.cgi?the_utf8_perlio_layer>
164              
165             L<http://www.perlmonks.org/?node_id=644786>
166              
167             =head1 AUTHOR
168              
169             Thomas R. Wyant, III F<wyant at cpan dot org>
170              
171             =head1 COPYRIGHT
172              
173             Copyright (c) 2010-2011 Thomas R. Wyant, III
174              
175             This program is free software; you can redistribute it and/or modify
176             it under the same terms as Perl itself.
177              
178             =cut
179              
180             # Local Variables:
181             # mode: cperl
182             # cperl-indent-level: 4
183             # fill-column: 78
184             # indent-tabs-mode: nil
185             # c-indentation-style: bsd
186             # End:
187             # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab shiftround :