File Coverage

blib/lib/Catalyst/Plugin/HTML/Scrubber.pm
Criterion Covered Total %
statement 64 69 92.7
branch 37 50 74.0
condition 6 6 100.0
subroutine 12 12 100.0
pod 1 3 33.3
total 120 140 85.7


line stmt bran cond sub pod time code
1             package Catalyst::Plugin::HTML::Scrubber;
2             $Catalyst::Plugin::HTML::Scrubber::VERSION = '0.06';
3 5     5   9063877 use Moose;
  5         57  
  5         45  
4 5     5   36927 use namespace::autoclean;
  5         15  
  5         47  
5              
6             with 'Catalyst::ClassData';
7              
8 5     5   491 use MRO::Compat;
  5         18  
  5         162  
9 5     5   3205 use HTML::Scrubber;
  5         15547  
  5         4990  
10              
11             __PACKAGE__->mk_classdata('_scrubber');
12              
13             sub setup {
14 4     4 1 1275495 my $c = shift;
15              
16 4         39 my $conf = $c->config->{scrubber};
17 4 50       548 if (ref $conf eq 'ARRAY') {
    100          
18 0         0 $c->_scrubber(HTML::Scrubber->new(@$conf));
19             } elsif (ref $conf eq 'HASH') {
20             $c->config->{scrubber}{auto} = 1
21 3 50       17 unless defined $c->config->{scrubber}{auto};
22 3         265 $c->_scrubber(HTML::Scrubber->new(@{$conf->{params}}));
  3         40  
23             } else {
24 1         9 $c->_scrubber(HTML::Scrubber->new());
25             }
26              
27 4         961 return $c->maybe::next::method(@_);
28             }
29              
30             sub execute {
31 129     129 0 672751 my $c = shift;
32              
33 129         625 $c->maybe::next::method(@_);
34              
35 129         72470 my $conf = $c->config->{scrubber};
36              
37             # There are two ways to configure the plugin, it seems; giving a hashref
38             # of params under `scrubber`, with any params intended for HTML::Scrubber
39             # under the vaguely-named `params` key, or an arrayref of params intended
40             # to be passed straight to HTML::Scrubber - save html_scrub() from knowing
41             # about that by abstracting that nastyness away:
42 129 100 100     11946 if (ref $conf ne 'HASH' || $conf->{auto}) {
43 93 100       379 $c->html_scrub(ref($conf) eq 'HASH' ? $conf : {});
44             }
45             }
46              
47             sub html_scrub {
48 95     95 0 673 my ($c, $conf) = @_;
49              
50             # If there's body_data - for e.g. a POSTed JSON body that was decoded -
51             # then we need to walk through it, scrubbing as appropriate; don't call
52             # body_data unless the content type is one there's a data handler for
53             # though, otherwise we'll trigger an exception (see GH#4)
54 95 100       265 if (exists $c->req->data_handlers->{ $c->req->content_type }) {
55 77 50       13021 if (my $body_data = $c->request->body_data) {
56 77         22696 $c->_scrub_recurse($conf, $c->request->body_data);
57             }
58             }
59              
60             # And if Catalyst::Controller::REST is in use so we have $req->data,
61             # then scrub that too
62 95 100       5076 if ($c->request->can('data')) {
63 9         293 my $data = $c->request->data;
64 9 50       337 if ($data) {
65 9         182 $c->_scrub_recurse($conf, $c->request->data);
66             }
67             }
68              
69             # Normal query/POST body parameters:
70 95         2813 $c->_scrub_recurse($conf, $c->request->parameters);
71              
72             }
73              
74             # Recursively scrub param values...
75             sub _scrub_recurse {
76 229     229   8302 my ($c, $conf, $data) = @_;
77              
78             # If the thing we've got is a hashref, walk over its keys, checking
79             # whether we should ignore, otherwise, do the needful
80 229 100       608 if (ref $data eq 'HASH') {
    50          
    0          
81 205         708 for my $key (keys %$data) {
82 256 100       637 if (!$c->_should_scrub_param($conf, $key)) {
83 48         143 next;
84             }
85              
86             # OK, it's fine to fettle with this key - if its value is
87             # a ref, recurse, otherwise, scrub
88 208 100       586 if (my $ref = ref $data->{$key}) {
89             $c->_scrub_recurse($conf, $data->{$key})
90 48 50       168 if defined $data->{$key};
91             } else {
92             # Alright, non-ref value, so scrub it
93             # FIXME why did we have to have this ref-ref handling fun?
94             #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
95             $data->{$key} = $c->_scrub_value($conf, $data->{$key})
96 160 50       563 if defined $data->{$key};
97             }
98             }
99             } elsif (ref $data eq 'ARRAY') {
100 24         51 for (@$data) {
101 48 50       95 if (ref $_) {
102 0         0 $c->_scrub_recurse($conf, $_);
103             } else {
104 48 50       134 $_ = $c->_scrub_value($conf, $_) if defined $_;
105             }
106             }
107             } elsif (ref $data eq 'CODE') {
108 0         0 $c->log->debug("Can't scrub a coderef!");
109             } else {
110             # This shouldn't happen, as we should always start with a ref,
111             # and non-ref hash/array values should have been handled above.
112 0         0 $c->log->debug("Non-ref to scrub - should this happen?");
113             }
114             }
115              
116              
117             # Wrap HTML::Scrubber's scrub() so we can decode HTML entities if needed
118             sub _scrub_value {
119 208     208   461 my ($c, $conf, $value) = @_;
120              
121 208 50       434 return $value unless defined $value;
122            
123 208         562 $value = $c->_scrubber->scrub($value);
124              
125 208 100       20936 if ($conf->{no_encode_entities}) {
126 12         63 $value = HTML::Entities::decode_entities($value);
127             }
128 208         818 return $value;
129             }
130              
131             sub _should_scrub_param {
132 256     256   504 my ($c, $conf, $param) = @_;
133             # If we only want to operate on certain params, do that checking
134             # now...
135 256 100 100     1115 if ($conf && $conf->{ignore_params}) {
136 216         642 my $ignore_params = $c->config->{scrubber}{ignore_params};
137 216 50       18234 if (ref $ignore_params ne 'ARRAY') {
138 0         0 $ignore_params = [ $ignore_params ];
139             }
140 216         504 for my $ignore_param (@$ignore_params) {
141 396 100       866 if (ref $ignore_param eq 'Regexp') {
142 216 100       1095 return if $param =~ $ignore_param;
143             } else {
144 180 100       446 return if $param eq $ignore_param;
145             }
146             }
147             }
148              
149             # If we've not bailed above, we didn't match any ignore_params
150             # entries, or didn't have any, so we do want to scrub
151 208         505 return 1;
152             }
153              
154              
155             # Incredibly nasty monkey-patch to rewind filehandle before parsing - see
156             # https://github.com/perl-catalyst/catalyst-runtime/pull/186
157             # First, get the default handlers hashref:
158             my $default_data_handlers = Catalyst->default_data_handlers();
159              
160             # Wrap the coderef for application/json in one that rewinds the filehandle
161             # first:
162             my $orig_json_handler = $default_data_handlers->{'application/json'};
163             $default_data_handlers->{'application/json'} = sub {
164             $_[0]->seek(0,0); # rewind $fh arg
165             $orig_json_handler->(@_);
166             };
167              
168              
169             {
170             # and now replace the original default_data_handlers() with a version that
171             # returns our modified handlers
172 5     5   61 no warnings 'redefine';
  5         22  
  5         536  
173             *Catalyst::default_data_handlers = sub {
174 4     4   10734 return $default_data_handlers;
175             };
176             }
177              
178             __PACKAGE__->meta->make_immutable;
179              
180             1;
181             __END__
182              
183              
184             =head1 NAME
185              
186             Catalyst::Plugin::HTML::Scrubber - Catalyst plugin for scrubbing/sanitizing incoming parameters
187              
188             =head1 SYNOPSIS
189              
190             use Catalyst qw[HTML::Scrubber];
191              
192             MyApp->config(
193             scrubber => {
194             auto => 1, # automatically run on request
195             ignore_params => [ qr/_html$/, 'article_body' ],
196              
197             # HTML::Scrubber will HTML-encode some chars, e.g. angle
198             # brackets. If you don't want that, enable this setting and
199             # the scrubbed values will be unencoded.
200             no_decode_entities => 0,
201            
202             # The following are options to HTML::Scrubber
203             params => [
204             default => 0,
205             comment => 0,
206             script => 0,
207             process => 0,
208             allow => [qw [ br hr b a h1]],
209             ],
210             },
211             );
212              
213             =head1 DESCRIPTION
214              
215             On request, sanitize HTML tags in all params (with the ability to exempt
216             some if needed), to protect against XSS (cross-site scripting) attacks and
217             other unwanted things.
218              
219              
220             =head1 EXTENDED METHODS
221              
222             =over 4
223              
224             =item setup
225              
226             See SYNOPSIS for how to configure the plugin, both with its own configuration
227             (e.g. whether to automatically run, whether to exempt certain fields) and
228             passing on any options from L<HTML::Scrubber> to control exactly what
229             scrubbing happens.
230              
231             =item dispatch
232              
233             Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
234             this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
235             accessed via `$c->req->body_data` or `$c->req->data`.
236              
237             =back
238              
239             =head1 SEE ALSO
240              
241             L<Catalyst>, L<HTML::Scrubber>.
242              
243             =head1 AUTHOR
244              
245             Hideo Kimura, << <hide@hide-k.net> >> original author
246              
247             David Precious (BIGPRESH), C<< <davidp@preshweb.co.uk> >> maintainer since 2023-07-17
248              
249             =head1 COPYRIGHT AND LICENSE
250              
251             Copyright (C) 2005 by Hideo Kimura
252              
253             This library is free software; you can redistribute it and/or modify
254             it under the same terms as Perl itself.
255              
256             =cut