File Coverage

blib/lib/String/Smart.pm
Criterion Covered Total %
statement 58 58 100.0
branch 14 16 87.5
condition 13 18 72.2
subroutine 15 15 100.0
pod 7 7 100.0
total 107 114 93.8


line stmt bran cond sub pod time code
1             package String::Smart;
2              
3 2     2   59361 use warnings;
  2         5  
  2         157  
4 2     2   11 use strict;
  2         6  
  2         70  
5 2     2   13 use Carp;
  2         8  
  2         202  
6 2     2   12 use Exporter;
  2         5  
  2         111  
7 2     2   16 use Scalar::Util qw( blessed );
  2         4  
  2         270  
8              
9 2     2   5062 use overload '""' => \&str_val;
  2         2763  
  2         16  
10              
11             =head1 NAME
12              
13             String::Smart - Strings that know how to escape themselves.
14              
15             =head1 VERSION
16              
17             This document describes String::Smart version 0.4
18              
19             =cut
20              
21 2     2   132 use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS );
  2         3  
  2         2043  
22              
23             $VERSION = '0.4';
24             @ISA = qw( Exporter );
25             @EXPORT_OK = qw( already as add_rep literal plain rep str_val );
26             %EXPORT_TAGS = ( all => \@EXPORT_OK );
27              
28             my %rep_map = ();
29              
30             =head1 SYNOPSIS
31              
32             use String::Smart;
33             my $plain = "";
34             my $html = as html => "

<This is HTML>

";
35            
36             print as html => $plain, as html => $html;
37             # Prints "<This is plain text>

<This is HTML>

"
38              
39             print plain $html;
40             # Croaks: "Can't decode markup"
41              
42             =head1 DESCRIPTION
43              
44             String::Smart implements overloaded string values that know how they are
45             currently encoded or escaped and are capable of transforming themselves
46             into other encodings.
47              
48             In many applications it is necessary to apply various escaping rules to
49             strings before they can safely be used. For example when building a SQL
50             query string literals must be escaped to avoid SQL injection
51             vulnerabilities.
52              
53             Typically this is achieved by SQL escaping all strings that are passed
54             to the query builder. But what if you pass a string that has already
55             been SQL escaped? Or a string that is URL encoded? If you wish to pass a
56             mixture of already-encoded strings and plain string literals you have to
57             arrange some out of band means of communicating the encoding state of
58             each string.
59              
60             With C you simply make the query building routine
61             ask for SQL escaped strings and behind the scenes the appropriate
62             transformations will be applied to each string based on its
63             current encoding.
64              
65             For example:
66              
67             my $uri_enc = already uri => 'Spaces+are+evil';
68             my $sql_enc = already sql => "\\'Quotes are backslashed\\'";
69             my $not_enc = "Just some literal punctuation: %'+";
70              
71             print literal sql => $uri_enc;
72             # removes URI encoding
73             # applies SQL encoding
74             # prints
75             # Spaces are evil
76              
77             print literal sql => $sql_enc;
78             # already sql encoded
79             # prints
80             # \'Quotes are backslashed\'
81              
82             print literal sql => $not_enc;
83             # applies SQL encoding
84             # prints
85             # Just some literal punctuation: %\'+
86              
87             The important point is that the requested encoding is absolute rather
88             than relative. A C knows how it is currently encoded and
89             can work out how to re-encode itself in the requested way.
90              
91             =head2 A note on the examples
92              
93             Throughout the documentation I assume that various encoding
94             representations (C, C, C) have already been defined.
95             These are not defined by C and must be set up by calling
96             C with the appropriate conversion subroutines before the
97             examples will run.
98              
99             =head1 INTERFACE
100              
101             =head2 C<< add_rep >>
102              
103             Add an encoding representation. The namespace for encodings is global.
104             This may turn out to be a problem - and may therefore change.
105              
106             add_rep reversed => sub { reverse $_[0] }, sub { reverse $_[0] };
107             my $this = "Hello";
108             my $that = reversed "Hello";
109             print as reversed => $this, "\n";
110             # prints "olleH"
111             print as reversed => $that, "\n";
112             # also prints "olleH"
113              
114             A representation consists of a name and two subroutine references. The
115             first subroutine applies the encoding, the second reverses it. If either
116             subroutine is undefined a boilerplate subroutine that throws a
117             descriptive error will be used in its place.
118              
119             =cut
120              
121             sub add_rep($$$) {
122 6     6 1 677 my ( $name, $to, $from ) = @_;
123              
124 6 100       321 croak "$name contains an underscore"
125             if $name =~ /_/;
126              
127 5         15 my %spec = ( from => $from, to => $to );
128 5         15 for my $dir ( keys %spec ) {
129 10 100       29 unless ( defined $spec{$dir} ) {
130             $spec{$dir} = sub {
131 2     2   295 croak "Don't know how to convert $dir $name";
132 2         11 };
133             }
134             }
135              
136 5         22 $rep_map{$name} = \%spec;
137             }
138              
139             =head2 C<< as >>
140              
141             Coerce a string into the specified encoding.
142              
143             my $representation = as html => $some_string;
144              
145             Optionally multiple encodings my be supplied either like this:
146              
147             my $rep = as html_nl2br => $some_string;
148              
149             Or like this:
150              
151             my $rep = as ['html', 'nl2br'], $some_string;
152              
153             The returned object (actually a hash blessed to C)
154             will have the specified encoding irrespective of it's current
155             encoding. For example the sequence:
156              
157             my $html1 = as html => $some_string;
158             my $html2 = as html => $html1;
159              
160             Does I result in double encoding. The encodings you request are
161             'absolute'. A path of transformations that will convert the string from
162             whatever its current encoding is will be computed and applied.
163              
164             =cut
165              
166             sub as($$) {
167 16     16 1 1661 my ( $desired, $str ) = @_;
168              
169             my @desired
170 16 100       58 = map { split /_/ } 'ARRAY' eq ref $desired ? @$desired : $desired;
  11         48  
171              
172 16 100 66     129 unless ( blessed $str && $str->isa( __PACKAGE__ ) ) {
173 6         23 $str = bless { val => $str, rep => [] };
174             }
175              
176 16         38 my @got_rep = $str->rep;
177 16         65 my @want_rep = @desired;
178              
179             # Prune common reps
180 16   100     89 while ( @got_rep && @want_rep && $got_rep[0] eq $want_rep[0] ) {
      100        
181 7         10 shift @got_rep;
182 7         33 shift @want_rep;
183             }
184              
185 16         27 $str = $str->{val};
186              
187 16         65 for my $spec ( [ 'from', reverse @got_rep ], [ 'to', @want_rep ] ) {
188 31         65 my $dir = shift @$spec;
189 31         58 for my $rep ( @$spec ) {
190 21   66     222 my $handler = $rep_map{$rep} || croak "Don't know about $rep";
191 20         57 $str = $handler->{$dir}->( $str );
192             }
193             }
194              
195 13         132 return bless {
196             val => $str,
197             rep => \@desired,
198             };
199             }
200              
201             =head2 C<< already >>
202              
203             Declare that a string is already encoded in a particular way. For example:
204              
205             my $html = already html => '

This is a paragraph

';
206             my $text = 'This is just << some text >>';
207            
208             print literal html => $html;
209             # already HTML encoded
210             # prints
211             #

This is a paragraph

212            
213             print literal html => $text;
214             # applies HTML encoding
215             # prints
216             # This is just << some text >>
217              
218             =cut
219              
220             sub already($$) {
221 2         18 return bless {
222             val => $_[1],
223 2 50   2 1 571 rep => [ map { split /_/ } 'ARRAY' eq ref $_[0] ? @$_[0] : $_[0] ]
224             };
225             }
226              
227             =head2 C<< literal >>
228              
229             Convert a string to the specified encoding and return it as a normal
230             unblessed string.
231              
232             =cut
233              
234 6     6 1 34 sub literal($$) { as( $_[0], $_[1] )->{val} }
235              
236             =head2 C<< plain >>
237              
238             Remove any encoding from a string.
239              
240             my $uri_enc = already uri => 'Spaces+are+evil%21';
241             print plain $uri_enc;
242             # prints
243             # Spaces are evil!
244              
245             =cut
246              
247 5     5 1 19 sub plain($) { literal( [], $_[0] ) }
248              
249             =head2 C<< str_val >>
250              
251             Get the string representation of a C. No encoding
252             coercion takes place; C returns a string encoded according to
253             the current encodings.
254              
255             =cut
256              
257             sub str_val($) {
258 10     10 1 1336 my $str = $_[0];
259 10 50 33     116 blessed $str && $str->isa( __PACKAGE__ ) ? $str->{val} : $str;
260             }
261              
262             =head2 C<< rep >>
263              
264             Return a list of encodings that currently applies to the specfied
265             string.
266              
267             my $text = 'Just text';
268             my @trep = rep $text; # @trep gets ()
269            
270             my $html = already html => '

Boo!

';
271             my @hrep = rep $html; # @hrep gets ( 'html' )
272              
273             =cut
274              
275             sub rep {
276 20     20 1 28 my $str = $_[0];
277 20 100 66     138 if ( blessed $str && $str->isa( __PACKAGE__ ) ) {
278 19         26 my @r = @{ $str->{rep} };
  19         105  
279 19 100       81 return wantarray ? @r : join '_', @r;
280             }
281 1         6 return;
282             }
283              
284             1;
285             __END__