File Coverage

lib/Text/EscapeDelimiters.pm
Criterion Covered Total %
statement 48 50 96.0
branch 23 24 95.8
condition 3 3 100.0
subroutine 9 10 90.0
pod 5 7 71.4
total 88 94 93.6


line stmt bran cond sub pod time code
1             ###############################################################################
2             # Purpose : Escape delimiter characters within strings
3             # Author : John Alden
4             # Created : Jan 2005
5             # CVS : $Id: EscapeDelimiters.pm,v 1.4 2005/03/20 23:10:53 aldenj20 Exp $
6             ###############################################################################
7              
8             package Text::EscapeDelimiters;
9              
10 1     1   1357 use strict;
  1         2  
  1         29  
11 1     1   4 use Carp;
  1         2  
  1         61  
12 1     1   13 use vars qw($VERSION);
  1         1  
  1         718  
13             $VERSION = sprintf "%d.%03d", (q$Revision: 1.4 $ =~ /: (\d+)\.(\d+)/);
14              
15             sub new {
16 5     5 1 2762 my ($class, $options) = @_;
17 5 100       18 my $self = {
18             'EscapeSequence' => exists $options->{EscapeSequence}? $options->{EscapeSequence} : "\\"
19             };
20 5         18 return bless($self, $class);
21             }
22              
23             sub escape {
24 16     16 1 531 my($self, $string, $delim) = @_;
25 16         53 my $eseq = $self->{EscapeSequence};
26 16 100       32 return $string unless($eseq); #no-op
27            
28 15 100       34 unless(ref $delim eq 'ARRAY') {
29 2 100       7 if(!defined $delim) {$delim = []}
  1 50       1  
  1         185  
30 0         0 elsif(ref $delim) {croak("Delimiter should be scalar or an arrayref")}
31             else {$delim = [$delim]}
32             }
33            
34 14         22 foreach my $char($eseq, @$delim) {
35 40 100 100     155 next unless(defined $char && length($char));
36 38         283 $string =~ s/\Q$char\E/$eseq$char/sg;
37             }
38            
39 14         100 return $string;
40             }
41              
42             sub regex {
43 13     13 1 1610 my($self, $delim) = @_;
44              
45 13         23 TRACE($delim);
46              
47 13 100       28 unless(ref $delim eq 'ARRAY') {
48 12 100       32 if(!defined $delim) {$delim = []}
  2 100       4  
  1         591  
49 9         18 elsif(ref $delim) {croak("Delimiter should be scalar or an arrayref")}
50             else {$delim = [$delim]}
51             }
52              
53 12         25 my $regexp = join("|", map {'(?:' . quotemeta($_) . ')'} @$delim);
  11         28  
54 12 100       29 $regexp = '(?:' . $regexp . ')' if(scalar @$delim > 1);
55 12 100       27 if($self->{EscapeSequence}) {
56 11         23 $regexp = '(?{EscapeSequence}).')'.$regexp; #Use negative look behind
57             }
58              
59 12         27 TRACE("regex = ".$regexp);
60 12         144 return qr/$regexp/;
61             }
62              
63             sub split {
64 9     9 1 1334 my($self, $string, $delim) = @_;
65 9         18 my $regexp = $self->regex($delim);
66 9         25 TRACE("regex = ".$regexp);
67 9         80 return split($regexp, $string);
68             }
69              
70             sub unescape {
71 13     13 1 35 my($self, $string) = @_;
72 13         17 my $eseq = $self->{EscapeSequence};
73 13 100       32 return $string unless($eseq); #no-op
74            
75             #Remove escape characters apart from double-escapes
76 12         67 $string =~ s/\Q$eseq\E(?!\Q$eseq\E)//gs;
77              
78             #Fold double-escapes down to single escapes
79 12         41 $string =~ s/\Q$eseq$eseq\E/$eseq/gs;
80              
81 12         42 return $string;
82             }
83              
84             #Tracing stubs compatible with Log::Trace
85 34     34 0 39 sub TRACE{}
86 0     0 0   sub DUMP{}
87              
88             1;
89              
90             =head1 NAME
91              
92             Text::EscapeDelimiters - escape delimiter characters within strings
93              
94             =head1 SYNOPSIS
95              
96             my $obj = new Text::EscapeDelimiters();
97              
98             #Convert a list of lists into a string using tab and newline as field and record delimiters
99             #Escape any delimiters occurring in the strings first
100             my $stringified = join("\n", map {
101             join("\t", map {$obj->escape($_, ["\t", "\n"])} @$_)
102             } @records);
103              
104             #Convert the string back, respecting the escapes
105             @records = map {
106             [ map {$obj->unescape($_)} $obj->split($_, "\t") ]
107             } $obj->split($stringified, "\n");
108              
109             #Pick off the first 5 records
110             my $delim_regex = $obj->regex("\n");
111             my @first_five;
112             for(1..5) {
113             $stringified =~ /(.*?)$delim_regex/g;
114             push @first_five, [ map {$obj->unescape($_)} $obj->split($1, "\t") ];
115             }
116              
117             =head1 DESCRIPTION
118              
119             When joining strings with a delimiter (aka separator), you need to worry about escaping occurences of that delimiter in the values you are joining.
120             When splitting on the delimiter, you need to respect the escape sequences so you don't split on escaped delimiters.
121              
122             This module provides a solution to that problem allowing you to escape values before you join,
123             split the values whilst respecting escaped delimiters, and finally unescape the data.
124              
125             Escaping is achieved by placing an escape sequence in front of delimiter characters.
126             The default escape sequence is a backslash but you can change this.
127              
128             =over 4
129              
130             =item $obj = new Text::EscapeDelimiters(\%options)
131              
132             Valid options are:
133              
134             =over 4
135              
136             =item EscapeSequence
137              
138             One or more characters that will be used as an escape sequence in front of delimiter characters.
139             If not supplied, defaults to a backslash.
140             An undef or empty string of this key can be used to specify a null escape sequence.
141              
142             =back
143              
144             =item $escaped = $obj->escape($string, $delimiters)
145              
146             Escapes one or more delimiter characters in a string ($delimiters can be a scalar or an an arrayref)
147              
148             =item @list = $obj->split($escaped_and_joined, $delimiter)
149              
150             Splits an escaped string on a delimiter (respecting escaped delimiters)
151              
152             =item $regex = $obj->regex($delimiters)
153              
154             Creates a regular expression that will match delimiters (but not escaped delimiters). $delimiters can be a scalar or an an arrayref.
155              
156             =item $string = $obj->unescape($escaped)
157              
158             Inverse of escape()
159              
160             =back
161              
162             =head1 VERSION
163              
164             See $Text::EscapeDelimiters::VERSION.
165             Last edit: $Revision: 1.4 $ on $Date: 2005/03/20 23:10:53 $
166              
167             =head1 BUGS
168              
169             None known. This module has not yet been used heavily in production so it's not impossible a bug may have slipped through the unit tests.
170             Bug reports are welcome, particularly with patches & test cases.
171              
172             =head1 AUTHOR
173              
174             John Alden
175              
176             =head1 SEE ALSO
177              
178             =over 4
179              
180             =item URI::Escape
181              
182             Escapes/unescapes strings using URI encoding
183              
184             =item Tie::Scalar::Escaped
185              
186             Similar to URI::Escape, but provides a C interface.
187              
188             =item String::Escape
189              
190             Routines for backslash escaping strings
191              
192             =item Regex::Common::delimited
193              
194             Provides regexes for extracting values between PAIRED delimiters (e.g. quotes).
195              
196             =item Text::DelimMatch
197              
198             Module for extracting values between PAIRED delimiters (e.g. quotes).
199             Handles escaped delimiter characters etc.
200              
201             =back
202              
203             =head1 COPYRIGHT AND LICENSE
204              
205             Copyright 2005 by John Alden
206              
207             This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.