File Coverage

blib/lib/Catalyst/Plugin/HTML/Scrubber.pm
Criterion Covered Total %
statement 58 63 92.0
branch 34 46 73.9
condition 6 6 100.0
subroutine 11 11 100.0
pod 1 3 33.3
total 110 129 85.2


line stmt bran cond sub pod time code
1             package Catalyst::Plugin::HTML::Scrubber;
2             $Catalyst::Plugin::HTML::Scrubber::VERSION = '0.05';
3 5     5   9184645 use Moose;
  5         58  
  5         56  
4 5     5   39752 use namespace::autoclean;
  5         12  
  5         61  
5              
6             with 'Catalyst::ClassData';
7              
8 5     5   504 use MRO::Compat;
  5         12  
  5         162  
9 5     5   3192 use HTML::Scrubber;
  5         15286  
  5         4471  
10              
11             __PACKAGE__->mk_classdata('_scrubber');
12              
13             sub setup {
14 4     4 1 1276578 my $c = shift;
15              
16 4         37 my $conf = $c->config->{scrubber};
17 4 50       578 if (ref $conf eq 'ARRAY') {
    100          
18 0         0 $c->_scrubber(HTML::Scrubber->new(@$conf));
19             } elsif (ref $conf eq 'HASH') {
20             $c->config->{scrubber}{auto} = 1
21 3 50       24 unless defined $c->config->{scrubber}{auto};
22 3         256 $c->_scrubber(HTML::Scrubber->new(@{$conf->{params}}));
  3         35  
23             } else {
24 1         10 $c->_scrubber(HTML::Scrubber->new());
25             }
26              
27 4         938 return $c->maybe::next::method(@_);
28             }
29              
30             sub execute {
31 117     117 0 622524 my $c = shift;
32              
33 117         575 $c->maybe::next::method(@_);
34              
35 117         71765 my $conf = $c->config->{scrubber};
36              
37             # There are two ways to configure the plugin, it seems; giving a hashref
38             # of params under `scrubber`, with any params intended for HTML::Scrubber
39             # under the vaguely-named `params` key, or an arrayref of params intended
40             # to be passed straight to HTML::Scrubber - save html_scrub() from knowing
41             # about that by abstracting that nastyness away:
42 117 100 100     11076 if (ref $conf ne 'HASH' || $conf->{auto}) {
43 81 100       321 $c->html_scrub(ref($conf) eq 'HASH' ? $conf : {});
44             }
45             }
46              
47             sub html_scrub {
48 83     83 0 597 my ($c, $conf) = @_;
49              
50             # If there's body_data - for e.g. a POSTed JSON body that was decoded -
51             # then we need to walk through it, scrubbing as appropriate; don't call
52             # body_data unless the content type is one there's a data handler for
53             # though, otherwise we'll trigger an exception (see GH#4)
54 83 100       233 if (exists $c->req->data_handlers->{ $c->req->content_type }) {
55 65 50       11016 if (my $body_data = $c->request->body_data) {
56 65         19770 $c->_scrub_recurse($conf, $c->request->body_data);
57             }
58             }
59              
60             # And if Catalyst::Controller::REST is in use so we have $req->data,
61             # then scrub that too
62 83 100       10720 if ($c->request->can('data')) {
63 9         282 my $data = $c->request->data;
64 9 50       345 if ($data) {
65 9         189 $c->_scrub_recurse($conf, $c->request->data);
66             }
67             }
68              
69             # Normal query/POST body parameters:
70 83         3250 $c->_scrub_recurse($conf, $c->request->parameters);
71              
72             }
73              
74             # Recursively scrub param values...
75             sub _scrub_recurse {
76 205     205   7183 my ($c, $conf, $data) = @_;
77              
78             # If the thing we've got is a hashref, walk over its keys, checking
79             # whether we should ignore, otherwise, do the needful
80 205 100       561 if (ref $data eq 'HASH') {
    50          
    0          
81 181         659 for my $key (keys %$data) {
82 232 100       5513 if (!$c->_should_scrub_param($conf, $key)) {
83 48         142 next;
84             }
85              
86             # OK, it's fine to fettle with this key - if its value is
87             # a ref, recurse, otherwise, scrub
88 184 100       511 if (my $ref = ref $data->{$key}) {
89             $c->_scrub_recurse($conf, $data->{$key})
90 48 50       200 if defined $data->{$key};
91             } else {
92             # Alright, non-ref value, so scrub it
93             # FIXME why did we have to have this ref-ref handling fun?
94             #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
95             $data->{$key} = $c->_scrubber->scrub($data->{$key})
96 136 50       521 if defined $data->{$key};
97             }
98             }
99             } elsif (ref $data eq 'ARRAY') {
100 24         53 for (@$data) {
101 48 50       2167 if (ref $_) {
102 0         0 $c->_scrub_recurse($conf, $_);
103             } else {
104 48 50       148 $_ = $c->_scrubber->scrub($_) if defined $_;
105             }
106             }
107             } elsif (ref $data eq 'CODE') {
108 0         0 $c->log->debug("Can't scrub a coderef!");
109             } else {
110             # This shouldn't happen, as we should always start with a ref,
111             # and non-ref hash/array values should have been handled above.
112 0         0 $c->log->debug("Non-ref to scrub - should this happen?");
113             }
114             }
115              
116             sub _should_scrub_param {
117 232     232   464 my ($c, $conf, $param) = @_;
118             # If we only want to operate on certain params, do that checking
119             # now...
120 232 100 100     938 if ($conf && $conf->{ignore_params}) {
121 192         625 my $ignore_params = $c->config->{scrubber}{ignore_params};
122 192 50       16107 if (ref $ignore_params ne 'ARRAY') {
123 0         0 $ignore_params = [ $ignore_params ];
124             }
125 192         407 for my $ignore_param (@$ignore_params) {
126 348 100       738 if (ref $ignore_param eq 'Regexp') {
127 192 100       950 return if $param =~ $ignore_param;
128             } else {
129 156 100       377 return if $param eq $ignore_param;
130             }
131             }
132             }
133              
134             # If we've not bailed above, we didn't match any ignore_params
135             # entries, or didn't have any, so we do want to scrub
136 184         443 return 1;
137             }
138              
139              
140             # Incredibly nasty monkey-patch to rewind filehandle before parsing - see
141             # https://github.com/perl-catalyst/catalyst-runtime/pull/186
142             # First, get the default handlers hashref:
143             my $default_data_handlers = Catalyst->default_data_handlers();
144              
145             # Wrap the coderef for application/json in one that rewinds the filehandle
146             # first:
147             my $orig_json_handler = $default_data_handlers->{'application/json'};
148             $default_data_handlers->{'application/json'} = sub {
149             $_[0]->seek(0,0); # rewind $fh arg
150             $orig_json_handler->(@_);
151             };
152              
153              
154             {
155             # and now replace the original default_data_handlers() with a version that
156             # returns our modified handlers
157 5     5   46 no warnings 'redefine';
  5         22  
  5         535  
158             *Catalyst::default_data_handlers = sub {
159 4     4   10553 return $default_data_handlers;
160             };
161             }
162              
163             __PACKAGE__->meta->make_immutable;
164              
165             1;
166             __END__
167              
168              
169             =head1 NAME
170              
171             Catalyst::Plugin::HTML::Scrubber - Catalyst plugin for scrubbing/sanitizing incoming parameters
172              
173             =head1 SYNOPSIS
174              
175             use Catalyst qw[HTML::Scrubber];
176              
177             MyApp->config(
178             scrubber => {
179             auto => 1, # automatically run on request
180             ignore_params => [ qr/_html$/, 'article_body' ],
181            
182             # The following are options to HTML::Scrubber
183             params => [
184             default => 0,
185             comment => 0,
186             script => 0,
187             process => 0,
188             allow => [qw [ br hr b a h1]],
189             ],
190             },
191             );
192              
193             =head1 DESCRIPTION
194              
195             On request, sanitize HTML tags in all params (with the ability to exempt
196             some if needed), to protect against XSS (cross-site scripting) attacks and
197             other unwanted things.
198              
199              
200             =head1 EXTENDED METHODS
201              
202             =over 4
203              
204             =item setup
205              
206             See SYNOPSIS for how to configure the plugin, both with its own configuration
207             (e.g. whether to automatically run, whether to exempt certain fields) and
208             passing on any options from L<HTML::Scrubber> to control exactly what
209             scrubbing happens.
210              
211             =item dispatch
212              
213             Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) -
214             this includes normal POST params, and serialised data (e.g. a POSTed JSON body)
215             accessed via `$c->req->body_data` or `$c->req->data`.
216              
217             =back
218              
219             =head1 SEE ALSO
220              
221             L<Catalyst>, L<HTML::Scrubber>.
222              
223             =head1 AUTHOR
224              
225             Hideo Kimura, << <hide@hide-k.net> >> original author
226              
227             David Precious (BIGPRESH), C<< <davidp@preshweb.co.uk> >> maintainer since 2023-07-17
228              
229             =head1 COPYRIGHT AND LICENSE
230              
231             Copyright (C) 2005 by Hideo Kimura
232              
233             This library is free software; you can redistribute it and/or modify
234             it under the same terms as Perl itself.
235              
236             =cut