File Coverage

blib/lib/String/EscapeCage.pm
Criterion Covered Total %
statement 42 49 85.7
branch 5 14 35.7
condition n/a
subroutine 14 17 82.3
pod 4 10 40.0
total 65 90 72.2


line stmt bran cond sub pod time code
1             package String::EscapeCage;
2              
3 5     5   118226 use warnings;
  5         14  
  5         163  
4 5     5   25 use strict;
  5         9  
  5         243  
5              
6             our $VERSION = '0.02';
7 5     5   28 use base qw( Exporter );
  5         13  
  5         745  
8             our @EXPORT_OK = qw( cage uncage ); # escape added automatically
9             use overload
10 5         49 '""' => \&stringify,
11             '.' => \&concat,
12             '0+' => \&numify,
13 5     5   9131 bool => \&boolify;
  5         5402  
14              
15 5     5   402 use Carp;
  5         8  
  5         404  
16 5     5   3953 use Symbol qw( qualify_to_ref );
  5         4494  
  5         4628  
17 9     9 0 29 sub untaint($) { $_[0] =~ /(.*)/s; return $1; }
  9         50  
18             # This should be in a module, but Scalar::Util provides only "tainted",
19             # and Taint::Util and Taint::Runtime aren't in the standard distribution.
20              
21              
22              
23             # configuration constants:
24              
25             my $disable_cage = ''; # iff disable checking everywhere
26             my %dmz_callers = (); # disable checking when called from some packages
27             # TODO: make subs respect these, provide interface to set them
28              
29              
30              
31              
32             # implementation
33              
34             sub new
35             {
36 8     8 1 13 my $class = shift;
37 8         15 my $string = shift;
38             # TODO: error checking unless $disable_error_checking;
39 8         42 return bless \$string, $class;
40             }
41              
42             sub cage($)
43             {
44 8     8 1 38 my $value = shift; # a string
45 8         45 return __PACKAGE__->new($value);
46             }
47              
48             # TODO: Should we die/warn when the programmer attempts to uncage a
49             # normal string? I think so, because programmers really should keep track
50             # of what is caged and what is not. That is, they shouldn't just uncage
51             # everything in an attempt to get the data out. If a programmer wants to
52             # do that during rapid development, the solution is to temporarily import
53             # the uncageany sub under the name "uncage".
54              
55             sub uncage($)
56             {
57 4     4 1 941 my $self = shift;
58 4 50       25 croak "Not a caged string" unless UNIVERSAL::isa( $self, __PACKAGE__ );
59             # TODO: unless $disable_error_checking;
60 4         225 return untaint $$self; # assume user is competent, so untaint
61             }
62              
63             # I recommend against using uncageany: you should know what's caged and what's not
64             sub uncageany
65             {
66 0 0       0 return map {
67 0     0 0 0 untaint( UNIVERSAL::isa( $_, __PACKAGE__ ) ? $$_ : $_ )
68             } @_;
69             }
70              
71             sub stringify
72             {
73 2     2 0 3 my $self = shift;
74             # TODO: return $$self if caller_is_matching_string_against_regexp();
75 2 50       10 return $$self if $disable_cage; # don't untaint
76             # TODO: disable fatal errors according to program scope, caller scope
77             # TODO: warn only once per caller, object, creation point, value, etc
78 2         277 croak "Access of unescaped Caged string";
79             # TODO: report contents, where it was caged, etc
80 0         0 return $$self;
81             }
82              
83             sub concat
84             {
85 3     3 0 291 my $self = shift;
86 3         5 my $other = shift; # !ref string may get extra escaping
87 3         9 my $order = shift;
88 3 50       22 UNIVERSAL::isa($other,__PACKAGE__) and $other = $$other;
89 3 50       15 return cage( $order ? $other.$$self : $$self.$other );
90             }
91              
92             # when used as a number, we can ignore the danger, I think.
93             # anyway, the user really needs numeric access just to do bounds checking etc
94             sub numify
95             {
96 0     0 0 0 my $self = shift;
97 0         0 return $$self;
98             }
99              
100              
101             # when used as a boolean guard, we can ignore the danger
102             sub boolify
103             {
104 0     0 0 0 my $self = shift;
105 0         0 return $$self;
106             }
107              
108             # though I'd prefer to overload =~
109             sub re
110             {
111 2     2 1 1695 my $self = shift;
112 2         3 my $re = shift; # qr/regexp/
113 2         12 return $$self =~ /$re/;
114             }
115              
116              
117              
118              
119             # TODO: let extensions add elements
120             my %SCHEMES = ( # schemename => (transforming (xform)) escaping sub
121              
122             percent => sub {
123             my $string = shift;
124             $string =~ s/ [ =] / sprintf '%%%02X', ord($&) /xeg;
125             return $string;
126             },
127              
128             html => do {
129             my %ESCAPE_OF = (
130             '<' => '<',
131             '>' => '>',
132             '&' => '&',
133             "\n" => "
\n", # maybe
134             );
135             my $RE = eval 'qr/[' . join( '', keys(%ESCAPE_OF) ) . ']/';
136             # TODO: implement escaping properly
137             # TODO: better yet, use CGI::escapeHTML (and think about dependencies)
138             sub {
139             my $string = shift;
140             $string =~ s/$RE/$ESCAPE_OF{$&}/xg;
141             return $string;
142             }
143             },
144              
145             cstring => do { # or maybe use String::Escape
146             my %ESCAPE_OF = map { eval qq| "\\$_" | => "\\$_" }
147             qw( 0 a b t n f r \ " );
148             my $RE = eval 'qr/[' . join( '', keys(%ESCAPE_OF) ) . ']/';
149             sub {
150             my $string = shift;
151             $string =~ s/$RE/$ESCAPE_OF{$&}/xg;
152             return $string;
153             }
154             },
155              
156             # TODO: shell, sql, http header, cat -v
157             );
158              
159              
160             while( my($name,$xform) = each %SCHEMES ) {
161             my $subname = 'escape' . $name;
162             push @EXPORT_OK, $subname;
163             *{qualify_to_ref( $subname )} = sub($) {
164 5     5   615 my $self = shift;
165             # TODO: should pass remaining @_ params to xform sub?
166             # (would want to specify a different prototype)
167 5 0       23 my $string = UNIVERSAL::isa( $self, __PACKAGE__ ) ?
    50          
168             $$self :
169             !ref $self ?
170             $self : # TODO: think we should be a util for bare strings
171             croak "Not an EscapeCaged string";
172 5         15 return untaint $xform->( $string );
173             };
174             }
175              
176              
177              
178              
179             1;
180              
181              
182              
183              
184             =pod
185              
186             =head1 NAME
187              
188             String::EscapeCage - Cage and escape strings to prevent injection attacks
189              
190              
191             =head1 VERSION
192              
193             Version 0.02
194              
195              
196             =head1 SYNOPSIS
197              
198             The String::EscapeCage module puts dangerous strings in a cage. It eases
199             escaping to various encodings, helps developers track what data are
200             dangerous, and prevents injection attacks.
201              
202              
203             use String::EscapeCage qw( cage uncage escapehtml );
204              
205             my $name = cage $cgi->param('name');
206             print "Hello, ", $name, "\n"; # croaks to avoid HTML injection attack
207             print "Hello, ", escapehtml $name, "\n"; # nice and safe
208             print "Hello, ", uncage $name, "\n"; # remove protection
209              
210              
211              
212              
213             =head1 DESCRIPTION
214              
215             After the L> function cages a string, the L> method
216             releases it and L>, L>, etc methods safely
217             escape (transform) it. If an application cages all user-supplied strings,
218             then a run-time exception will prevent application code from accidentally
219             allowing an SQL, shell, cross-site scripting, cat -v, etc injection attack.
220             String::EscapeCage's paranoia can be adjusted for development. The concept is
221             similar to "tainted" data, but is implemented by "overload"ing the '""'
222             stringify method on blessed scalar references.
223              
224              
225             By default C does not export any subroutines.
226             The subroutines are (available for import and/or as methods):
227              
228              
229             =over 4
230              
231              
232             =item cage STRING / new STRING
233              
234             Return a new EscapeCage object holding the given string. C is
235             only available as an exported function; C is only available as a
236             class method.
237              
238              
239             =item uncage CAGE
240              
241             Returns the string that had been "caged" in the given EscapeCage object.
242             It will be untainted, since you presumably know what you're doing with it.
243             Available as an exported function or an object method.
244              
245              
246             =item re CAGE REGEXP
247              
248             Applies the REGEXP to the string that had been "caged", taking the place
249             of the regular expression binding operator C<=~>.
250              
251             I want to overload C<=~> and let an EscapeCage uncage and untaint
252             itself just as if it were a tainted strings, but L> doesn't
253             support C<=~>. So, this is an ugly work-around to get a little brevity
254             and to mark points for when we figure out overloading. Doesn't set the
255             (implicitly local()ized) numbered match variables (eg C<$1>) the way
256             you want.
257              
258              
259             =item escapecstring CAGE
260              
261             Returns the C-string-escaped transformation of the string that had been
262             "caged" in the given EscapeCage object. It will be untainted, since it
263             should be safe to print now. Available as an exported function or an
264             object method.
265              
266              
267             =item escapepercent CAGE
268              
269             Returns the URL percent-escaped transformation of the string that had been
270             "caged" in the given EscapeCage object. It will be untainted, since it
271             should be safe to print now. Available as an exported function or an
272             object method.
273              
274              
275             =back
276              
277              
278              
279              
280             =head1 ADDING STRING::ESCAPECAGE TO AN EXISTING PROJECT
281              
282             =over 4
283              
284             =item * Turn global paranoia off (not yet implemented); cage all incoming strings.
285              
286             =item * Over time, in each package, turn local paranoia on (not yet implemented); escape strings in the package's code and cage new strings.
287              
288             =item * When done, turn global paranoia back on.
289              
290             =item * Remove explicit local paranoia setting if desired.
291              
292             =back
293              
294              
295              
296              
297             =head1 CAVEATS
298              
299             =over 4
300              
301             =item * Different ref()/blessed() behavior
302              
303             =item * Doesn't protect against strings you build yourself; eg building
304             a URL string by manually decoding hex digits (May I suggest that the
305             decoding function should return a cage?).
306              
307             =back
308              
309              
310              
311              
312             =head1 COMPARISON WITH TAINT
313              
314             =over 4
315              
316             =item * Taint checking (for setuid etc) distrusts the invoking user;
317             String::EscapeCage focuses its distrust on explicitly marked data (usually input).
318              
319             =item * A tainted value may be print()ed or syswrite()d; an attempt to
320             print a caged value will croak.
321              
322             =item * Tainting lacks granularity; EscapeCages may be explicitly wrapped
323             around some data but not others.
324              
325             =item * A tainted value may be used as a method name or symbolic sub;
326             String::EscapeCage disallows this.
327              
328             =item * Taintedness can (essentially only) be removed via regular
329             expressions or hash keys; a String::EscapeCage can only be removed
330             with an explicit call to L>, L (regular expression)>,
331             L>, etc.
332              
333             =item * String::EscapeCage doesn't do the cleanup that the C<-T> taint flag
334             enables: C<@INC>, C<$ENV{PERL5LIB}> and C<$ENV{PERLLIB}>, C<$ENV{PATH}>,
335             any setuid/setgid issues.
336              
337             =back
338              
339              
340              
341              
342             =head1 BUGS
343              
344             =over 4
345              
346             =item * The interface was designed without input from a real project
347             and is subject to change.
348              
349             =item * You can't use a regular expression on a caged string
350              
351             =back
352              
353             Please report any bugs or feature requests to
354             C, or through the web interface at
355             L.
356             I will be notified, and then you'll automatically be notified of progress on
357             your bug as I make changes.
358              
359              
360              
361              
362             =head1 TODO
363              
364             =over 4
365              
366             =item * Define the interface. Until this is used in a real project,
367             it's tough to say what the optimal interface would be.
368              
369             =item * Provide different levels of strictness/fatality.
370              
371             =item * Provide levels of debugging. Notate cages with information for
372             humans: place where caged, reason, etc.
373              
374             =item * Give formally precise implementations of current escaping schemes:
375             percent, html, cstring.
376              
377             =item * Add other escaping schemes: shell, sql, http header, cat -v,
378             lots more.
379              
380             =item * Add a nice mechanism by which other modules can add other
381             escaping schemas.
382              
383             =item * Make wrappers of standard libraries that perform caging.
384             For example: A wrapper class for an IO::Handle object whose C
385             returns caged strings or whose C etc automatically htmlescapes
386             caged strings. A sub that changes all the values in an Apache::Request
387             object into caged values. Validation routines that "see through" cages.
388              
389             =item * Optimize. Maybe memoize escaped values, either by object
390             or by value. Maybe add the ability to turn off error checking.
391             Faster implementations of each escaping schema.
392              
393             =back
394              
395              
396              
397              
398             =head1 AUTHOR
399              
400             Mark P Sullivan
401             CPAN ID: msulliva
402             Zeroth Solutions
403              
404              
405             =head1 COPYRIGHT
406              
407             This program is free software; you can redistribute
408             it and/or modify it under the same terms as Perl itself.
409              
410             The full text of the license can be found in the
411             LICENSE file included with this module.
412              
413              
414             =head1 SEE ALSO
415              
416             taint in L, L
417              
418             =cut