| blib/lib/Mojo/DOM58/Entities.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 26 | 26 | 100.0 |
| branch | 6 | 6 | 100.0 |
| condition | 6 | 6 | 100.0 |
| subroutine | 8 | 8 | 100.0 |
| pod | 3 | 3 | 100.0 |
| total | 49 | 49 | 100.0 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package Mojo::DOM58::Entities; | ||||||
| 2 | |||||||
| 3 | 3 | 3 | 53691 | use strict; | |||
| 3 | 12 | ||||||
| 3 | 82 | ||||||
| 4 | 3 | 3 | 14 | use warnings; | |||
| 3 | 3 | ||||||
| 3 | 70 | ||||||
| 5 | 3 | 3 | 20 | use Exporter 'import'; | |||
| 3 | 7 | ||||||
| 3 | 1672 | ||||||
| 6 | |||||||
| 7 | our $VERSION = '2.000'; | ||||||
| 8 | |||||||
| 9 | our @EXPORT_OK = qw(html_attr_unescape html_escape html_unescape); | ||||||
| 10 | |||||||
| 11 | # To generate a new HTML entity table run this command | ||||||
| 12 | # perl examples/entities.pl | ||||||
| 13 | my %ENTITIES; | ||||||
| 14 | for my $line (split "\n", join('', )) { | ||||||
| 15 | next unless $line =~ /^(\S+)\s+U\+(\S+)(?:\s+U\+(\S+))?/; | ||||||
| 16 | $ENTITIES{$1} = defined $3 ? (chr(hex $2) . chr(hex $3)) : chr(hex $2); | ||||||
| 17 | } | ||||||
| 18 | close DATA; | ||||||
| 19 | |||||||
| 20 | # Characters that should be escaped in HTML/XML | ||||||
| 21 | my %ESCAPE = ( | ||||||
| 22 | '&' => '&', | ||||||
| 23 | '<' => '<', | ||||||
| 24 | '>' => '>', | ||||||
| 25 | '"' => '"', | ||||||
| 26 | '\'' => ''' | ||||||
| 27 | ); | ||||||
| 28 | |||||||
| 29 | # HTML entities | ||||||
| 30 | my $ENTITY_RE = qr/&(?:\#((?:[0-9]{1,7}|x[0-9a-fA-F]{1,6}));|(\w+[;=]?))/; | ||||||
| 31 | |||||||
| 32 | sub html_escape { | ||||||
| 33 | 460 | 460 | 1 | 675 | my $str = shift; | ||
| 34 | 460 | 874 | $str =~ s/([&<>"'])/$ESCAPE{$1}/ge; | ||||
| 15 | 50 | ||||||
| 35 | 460 | 1615 | return $str; | ||||
| 36 | } | ||||||
| 37 | |||||||
| 38 | 33024 | 33024 | 1 | 43402 | sub html_attr_unescape { _html(shift, 1) } | ||
| 39 | 994 | 994 | 1 | 2234 | sub html_unescape { _html(shift, 0) } | ||
| 40 | |||||||
| 41 | sub _entity { | ||||||
| 42 | 46 | 46 | 113 | my ($point, $name, $attr) = @_; | |||
| 43 | |||||||
| 44 | # Code point | ||||||
| 45 | 46 | 100 | 166 | return chr($point !~ /^x/ ? $point : hex $point) unless defined $name; | |||
| 100 | |||||||
| 46 | |||||||
| 47 | # Named character reference | ||||||
| 48 | 37 | 63 | my $rest = my $last = ''; | ||||
| 49 | 37 | 69 | while (length $name) { | ||||
| 50 | return $ENTITIES{$name} . reverse $rest | ||||||
| 51 | 75 | 100 | 100 | 382 | if exists $ENTITIES{$name} | ||
| 100 | |||||||
| 52 | && (!$attr || $name =~ /;$/ || $last !~ /[A-Za-z0-9=]/); | ||||||
| 53 | 48 | 91 | $rest .= $last = chop $name; | ||||
| 54 | } | ||||||
| 55 | 10 | 37 | return '&' . reverse $rest; | ||||
| 56 | } | ||||||
| 57 | |||||||
| 58 | sub _html { | ||||||
| 59 | 34018 | 34018 | 45133 | my ($str, $attr) = @_; | |||
| 60 | 34018 | 38055 | $str =~ s/$ENTITY_RE/_entity($1, $2, $attr)/geo; | ||||
| 46 | 94 | ||||||
| 61 | 34018 | 125869 | return $str; | ||||
| 62 | } | ||||||
| 63 | |||||||
| 64 | 1; | ||||||
| 65 | |||||||
| 66 | =encoding utf8 | ||||||
| 67 | |||||||
| 68 | =head1 NAME | ||||||
| 69 | |||||||
| 70 | Mojo::DOM58::Entities - Escape or unescape HTML entities in strings | ||||||
| 71 | |||||||
| 72 | =head1 SYNOPSIS | ||||||
| 73 | |||||||
| 74 | use Mojo::DOM58::Entities qw(html_escape html_unescape); | ||||||
| 75 | |||||||
| 76 | my $str = 'foo & bar'; | ||||||
| 77 | $str = html_unescape $str; # "foo & bar" | ||||||
| 78 | $str = html_escape $str; # "foo & bar" | ||||||
| 79 | |||||||
| 80 | =head1 DESCRIPTION | ||||||
| 81 | |||||||
| 82 | L |
||||||
| 83 | entities for L |
||||||
| 84 | functions are exported on demand. | ||||||
| 85 | |||||||
| 86 | =head1 FUNCTIONS | ||||||
| 87 | |||||||
| 88 | =head2 html_attr_unescape | ||||||
| 89 | |||||||
| 90 | my $str = html_attr_unescape $escaped; | ||||||
| 91 | |||||||
| 92 | Same as L"html_unescape">, but handles special rules from the | ||||||
| 93 | L for HTML attributes. | ||||||
| 94 | |||||||
| 95 | # "foo=bar<est=baz" | ||||||
| 96 | html_attr_unescape 'foo=bar<est=baz'; | ||||||
| 97 | |||||||
| 98 | # "foo=bar | ||||||
| 99 | html_attr_unescape 'foo=bar<est=baz'; | ||||||
| 100 | |||||||
| 101 | =head2 html_escape | ||||||
| 102 | |||||||
| 103 | my $escaped = html_escape $str; | ||||||
| 104 | |||||||
| 105 | Escape unsafe characters C<&>, C<< < >>, C<< > >>, C<">, and C<'> in string. | ||||||
| 106 | |||||||
| 107 | html_escape ' '; # "<div>" |
||||||
| 108 | |||||||
| 109 | =head2 html_unescape | ||||||
| 110 | |||||||
| 111 | my $str = html_unescape $escaped; | ||||||
| 112 | |||||||
| 113 | Unescape all HTML entities in string, according to the | ||||||
| 114 | L. | ||||||
| 115 | |||||||
| 116 | html_unescape '<div>'; # " " |
||||||
| 117 | |||||||
| 118 | =head1 BUGS | ||||||
| 119 | |||||||
| 120 | Report issues related to the format of this distribution or Perl 5.8 support to | ||||||
| 121 | the public bugtracker. Any other issues should be reported directly to the | ||||||
| 122 | upstream L |
||||||
| 123 | |||||||
| 124 | =head1 AUTHOR | ||||||
| 125 | |||||||
| 126 | Dan Book |
||||||
| 127 | |||||||
| 128 | Code and tests adapted from L |
||||||
| 129 | L |
||||||
| 130 | |||||||
| 131 | =head1 COPYRIGHT AND LICENSE | ||||||
| 132 | |||||||
| 133 | Copyright (c) 2008-2016 Sebastian Riedel and others. | ||||||
| 134 | |||||||
| 135 | Copyright (c) 2016 Dan Book for adaptation to standalone format. | ||||||
| 136 | |||||||
| 137 | This is free software, licensed under: | ||||||
| 138 | |||||||
| 139 | The Artistic License 2.0 (GPL Compatible) | ||||||
| 140 | |||||||
| 141 | =head1 SEE ALSO | ||||||
| 142 | |||||||
| 143 | L |
||||||
| 144 | |||||||
| 145 | =cut | ||||||
| 146 | |||||||
| 147 | __DATA__ |