File Coverage

blib/lib/HTML/Defang.pm
Criterion Covered Total %
statement 455 485 93.8
branch 267 366 72.9
condition 87 137 63.5
subroutine 23 25 92.0
pod 9 14 64.2
total 841 1027 81.8


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             package HTML::Defang;
4              
5             =head1 NAME
6              
7             HTML::Defang - Cleans HTML as well as CSS of scripting and other executable contents, and neutralises XSS attacks.
8              
9             =head1 SYNOPSIS
10              
11             my $InputHtml = "";
12              
13             my $Defang = HTML::Defang->new(
14             context => $Self,
15             fix_mismatched_tags => 1,
16             tags_to_callback => [ br embed img ],
17             tags_callback => \&DefangTagsCallback,
18             url_callback => \&DefangUrlCallback,
19             css_callback => \&DefangCssCallback,
20             attribs_to_callback => [ qw(border src) ],
21             attribs_callback => \&DefangAttribsCallback
22             );
23              
24             my $SanitizedHtml = $Defang->defang($InputHtml);
25              
26             # Callback for custom handling specific HTML tags
27             sub DefangTagsCallback {
28             my ($Self, $Defang, $OpenAngle, $lcTag, $IsEndTag, $AttributeHash, $CloseAngle, $HtmlR, $OutR) = @_;
29              
30             # Explicitly defang this tag, eventhough safe
31             return DEFANG_ALWAYS if $lcTag eq 'br';
32              
33             # Explicitly whitelist this tag, eventhough unsafe
34             return DEFANG_NONE if $lcTag eq 'embed';
35              
36             # I am not sure what to do with this tag, so process as HTML::Defang normally would
37             return DEFANG_DEFAULT if $lcTag eq 'img';
38             }
39              
40             # Callback for custom handling URLs in HTML attributes as well as style tag/attribute declarations
41             sub DefangUrlCallback {
42             my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $AttributeHash, $HtmlR) = @_;
43              
44             # Explicitly allow this URL in tag attributes or stylesheets
45             return DEFANG_NONE if $$AttrValR =~ /safesite.com/i;
46              
47             # Explicitly defang this URL in tag attributes or stylesheets
48             return DEFANG_ALWAYS if $$AttrValR =~ /evilsite.com/i;
49             }
50              
51             # Callback for custom handling style tags/attributes
52             sub DefangCssCallback {
53             my ($Self, $Defang, $Selectors, $SelectorRules, $Tag, $IsAttr) = @_;
54             my $i = 0;
55             foreach (@$Selectors) {
56             my $SelectorRule = $$SelectorRules[$i];
57             foreach my $KeyValueRules (@$SelectorRule) {
58             foreach my $KeyValueRule (@$KeyValueRules) {
59             my ($Key, $Value) = @$KeyValueRule;
60              
61             # Comment out any '!important' directive
62             $$KeyValueRule[2] = DEFANG_ALWAYS if $Value =~ '!important';
63              
64             # Comment out any 'position=fixed;' declaration
65             $$KeyValueRule[2] = DEFANG_ALWAYS if $Key =~ 'position' && $Value =~ 'fixed';
66             }
67             }
68             $i++;
69             }
70             }
71              
72             # Callback for custom handling HTML tag attributes
73             sub DefangAttribsCallback {
74             my ($Self, $Defang, $lcTag, $lcAttrKey, $AttrValR, $HtmlR) = @_;
75              
76             # Change all 'border' attribute values to zero.
77             $$AttrValR = '0' if $lcAttrKey eq 'border';
78              
79             # Defang all 'src' attributes
80             return DEFANG_ALWAYS if $lcAttrKey eq 'src';
81              
82             return DEFANG_NONE;
83             }
84              
85             =head1 DESCRIPTION
86              
87             This module accepts an input HTML and/or CSS string and removes any executable code including scripting, embedded objects, applets, etc., and neutralises any XSS attacks. A whitelist based approach is used which means only HTML known to be safe is allowed through.
88              
89             HTML::Defang uses a custom html tag parser. The parser has been designed and tested to work with nasty real world html and to try and emulate as close as possible what browsers actually do with strange looking constructs. The test suite has been built based on examples from a range of sources such as http://ha.ckers.org/xss.html and http://imfo.ru/csstest/css_hacks/import.php to ensure that as many as possible XSS attack scenarios have been dealt with.
90              
91             HTML::Defang can make callbacks to client code when it encounters the following:
92              
93             =over 4
94              
95             =item *
96              
97             When a specified tag is parsed
98              
99             =item *
100              
101             When a specified attribute is parsed
102              
103             =item *
104              
105             When a URL is parsed as part of an HTML attribute, or CSS property value.
106              
107             =item *
108              
109             When style data is parsed, as part of an HTML style attribute, or as part of an HTML ") if !$ClosingStyleTagPresent;
1587             }
1588            
1589             # We don't want