File Coverage

blib/lib/Catalyst/Plugin/Params/Demoronize.pm
Criterion Covered Total %
statement 38 38 100.0
branch 7 8 87.5
condition 4 5 80.0
subroutine 8 8 100.0
pod 1 1 100.0
total 58 60 96.6


line stmt bran cond sub pod time code
1             package Catalyst::Plugin::Params::Demoronize;
2             BEGIN {
3 2     2   3737 $Catalyst::Plugin::Params::Demoronize::VERSION = '1.14';
4             }
5              
6 2     2   19 use strict;
  2         80  
  2         79  
7 2     2   11 use warnings;
  2         4  
  2         109  
8 2     2   2234 use utf8;
  2         17  
  2         17  
9              
10             =head1 NAME
11              
12             Catalyst::Plugin::Params::Demoronize - convert common UTF-8 and Windows-1252 characters to their ASCII equivalents
13              
14             =head1 SYNOPSIS
15              
16             # Be sure and use the Unicode plugin if you want to handle Unicode
17             # replacement.
18             use Catalyst qw(Unicode Demoronize);
19              
20             # Optionally enable replacement of common unicode "smart" characters.
21             MyApp->config->{demoronize} = { replace_unicode => 1 }
22              
23             =head1 DESCRIPTION
24              
25             to borrow a few passages from the documentation packaged
26             with john walker's demoronizer.pl:
27              
28             =over 4
29              
30             ...as is usually the case when you encounter something
31             shoddy in the vicinity of a computer, Microsoft incompetence
32             and gratuitous incompatibility were to blame. Western
33             language HTML documents are written in the ISO 8859-1
34             Latin-1 character set, with a specified set of escapes for
35             special characters. Blithely ignoring this prescription, as
36             usual, Microsoft use their own "extension" to Latin-1, in
37             which a variety of characters which do not appear in Latin-1
38             are inserted in the range 0x82 through 0x95--this having the
39             merit of being incompatible with both Latin-1 and Unicode,
40             which reserve this region for additional control
41             characters.
42              
43             These characters include open and close single and double
44             quotes, em and en dashes, an ellipsis and a variety of other
45             things you've been dying for, such as a capital Y umlaut and
46             a florin symbol. Well, okay, you say, if Microsoft want to
47             have their own little incompatible character set, why not?
48             Because it doesn't stop there--in their inimitable fashion
49             (who would want to?)--they aggressively pollute the Web
50             pages of unknowing and innocent victims worldwide with these
51             characters, with the result that the owners of these pages
52             look like semi-literate morons when their pages are viewed
53             on non-Microsoft platforms (or on Microsoft platforms, for
54             that matter, if the user has selected as the browser's font
55             one of the many TrueType fonts which do not include the
56             incompatible Microsoft characters).
57              
58             You see, "state of the art" Microsoft Office applications
59             sport a nifty feature called "smart quotes." (Rule of
60             thumb--every time Microsoft use the word "smart," be on the
61             lookout for something dumb). This feature is on by default
62             in both Word and PowerPoint, and can be disabled only by
63             finding the little box buried among the dozens of
64             bewildering option panels these products contain. If
65             enabled, and you type the string,
66              
67             "Halt," he cried, "this is the police!"
68              
69             "smart quotes" transforms the ASCII quote characters
70             automatically into the incompatible Microsoft opening and
71             closing quotes. ASCII single and double quotes are
72             similarly transformed (even though ASCII already contains
73             apostrophe and single open quote characters), and double
74             hyphens are replaced by the incompatible em dash symbol.
75             What other horrors occur, I know not. If the user notices
76             this happening at all, their reaction might be "Thank you
77             Billy-boy--that looks ever so much nicer," not knowing
78             they've been set up to look like a moron to folks all over
79             the world.
80              
81             =back
82              
83             these characters are commonly inserted into form elements
84             via cut and paste operations. in many cases, they are
85             converted to UTF-8 by the browser. this plugin will replace
86             both the unicode characters AND the Windows-1252 characters
87             with sane ASCII equivalents.
88              
89             =head1 UNICODE
90              
91             Demoronize assumes that you are using L
92             to convert incoming parameters into Unicode characters. If you are
93             not and enable optional C, you may have issues.
94              
95             =head1 CONFIG
96              
97             =head2 replace_unicode
98              
99             If this flag is enabled (it is off by default) then commonly substituted
100             Unicode characters will be converted to their ASCII equivalents.
101              
102             =head2 replace_map
103              
104             A map of Unicode characters and their ASCII equivalents that will be swapped.
105             This can be overridden, but defaults to:
106              
107             =cut
108              
109 2     2   3383 use MRO::Compat;
  2         14655  
  2         79  
110 2     2   3740 use Encode::ZapCP1252;
  2         51288  
  2         2015  
111              
112             =head1 METHODS
113              
114             =over 4
115              
116             =item prepare_parameters
117              
118             Converts parameters.
119              
120             =cut
121              
122             sub prepare_parameters
123             {
124 7     7 1 10889 my $c = shift;
125              
126 7         48 my $retval = $c->maybe::next::method(@_);
127 7         149 my $params = $c->req->params;
128              
129 7         1875 foreach my $key (keys %$params) {
130 7         16 my $ref = \$params->{$key};
131              
132 7         18 for (ref $$ref) {
133 7 100       139 /^$/ && do { $$ref = $c->_demoronize($$ref) };
  6         27  
134 7 100       65 /^ARRAY$/ && do { $$ref = [ map { $c->_demoronize($_) } @$$ref ] };
  1         5  
  3         9  
135             }
136             }
137             }
138              
139             sub _demoronize
140             {
141 9     9   21 my $c = shift;
142 9         22 my $str = shift;
143              
144 9         388 zap_cp1252($str);
145              
146 9   100     515 my $config = $c->config->{'demoronize'} ||= {};
147              
148 9         843 $config->{replace_map} = {
149             '‚' => ',', # 82, SINGLE LOW-9 QUOTATION MARK
150             '„' => ',,', # 84, DOUBLE LOW-9 QUOTATION MARK
151             '…' => '...', # 85, HORIZONTAL ELLIPSIS
152             'ˆ' => '^', # 88, MODIFIER LETTER CIRCUMFLEX ACCENT
153             '‘' => '`', # 91, LEFT SINGLE QUOTATION MARK
154             '’' => "'", # 92, RIGHT SINGLE QUOTATION MARK
155             '“' => '"', # 93, LEFT DOUBLE QUOTATION MARK
156             '”' => '"', # 94, RIGHT DOUBLE QUOTATION MARK
157             '•' => '*', # 95, BULLET
158             '–' => '-', # 96, EN DASH
159             '—' => '-', # 97, EM DASH
160             '‹' => '<', # 8B, SINGLE LEFT-POINTING ANGLE QUOTATION MARK
161             '›' => '>', # 9B, SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
162             };
163              
164 9 100 66     62 if(exists($config->{'replace_unicode'}) && $config->{'replace_unicode'}) {
165              
166 3         4 foreach my $replace (keys(%{ $config->{replace_map} })) {
  3         11  
167 39 50       107 next unless defined($str);
168 39         475 $str =~ s/$replace/$config->{replace_map}->{$replace}/g;
169             }
170             }
171              
172 9         37 return $str;
173             }
174              
175             =back
176              
177             =head1 AUTHOR
178              
179             Mike Eldridge
180              
181             =head1 CONTRIBUTORS
182              
183             =over 4
184              
185             =item * Cory Watson
186              
187             =item * Chisel Wright
188              
189             =item * Michele Beltrame
190              
191             =back
192              
193             =cut
194              
195             1;
196