File Coverage

blib/lib/String/MatchInterpolate.pm
Criterion Covered Total %
statement 77 79 97.4
branch 26 30 86.6
condition n/a
subroutine 8 8 100.0
pod 4 4 100.0
total 115 121 95.0


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2007-2010 -- leonerd@leonerd.org.uk
5              
6             package String::MatchInterpolate;
7              
8             our $VERSION = '0.06';
9              
10 6     6   130057 use strict;
  6         14  
  6         195  
11 6     6   30 use warnings;
  6         8  
  6         148  
12              
13 6     6   38 use Carp;
  6         14  
  6         537  
14 6     6   6893 use Text::Balanced qw( extract_delimited );
  6         136403  
  6         6378  
15              
16             =head1 NAME
17              
18             C - named regexp capture and interpolation from the
19             same template.
20              
21             =head1 SYNOPSIS
22              
23             use String::MatchInterpolate;
24              
25             my $smi = String::MatchInterpolate->new( 'My name is ${NAME/\w+/}' );
26              
27             my $vars = $smi->match( "My name is Bob" );
28             my $name = $vars->{NAME};
29              
30             print $smi->interpolate( { NAME => "Jim" } ) . "\n";
31              
32             =head1 DESCRIPTION
33              
34             This module provides an object class which represents a string matching and
35             interpolation pattern. It contains named-variable placeholders which include
36             a regexp pattern to match them on. An instance of this class represents a
37             single pattern, which can be matched against or interpolated into.
38              
39             Objects in this class are not modified once constructed; they do not store
40             any runtime state other than data derived arguments passed to the constructor.
41              
42             =head2 Template Format
43              
44             The template consists of a string with named variable placeholders embedded in
45             it. It looks similar to a perl or shell string with interpolation:
46              
47             A string here with ${NAME/pattern/} interpolations
48              
49             The embedded variable is delmited by perl-style C<${ }> braces, and contains
50             a name and a pattern. The pattern is a normal perl regexp fragment that will
51             be used by the C method. This regexp should not contain any capture
52             brackets C<( )> as these will confuse the parsing logic. If the variable is
53             not named, it will be assigned a name based on its position, starting from 1
54             (i.e. similar to regexp capture buffers). If a variable does not provide a
55             matching pattern but the constructor was given a default with the
56             C option, this will be used instead.
57              
58             Outside of the embedded variables, the string is interpreted literally; i.e.
59             not as a regexp pattern. A backslash C<\> may be used to escape the following
60             character, allowing literal backslashes or dollar signs to be used.
61              
62             The intended use for this object class is that the template strings would come
63             from a configuration file, or some other source of "trusted" input. In the
64             current implementation, there is nothing to stop a carefully-crafted string
65             from containing arbitrary perl code, which would be executed every time the
66             C or C methods are called. (See "SECURITY" section).
67             This fact may be changed in a later version.
68              
69             =head2 Suffices
70              
71             By default, the beginning and end of the string match are both anchored. If
72             the C option is passed to the constructor, then the end of the
73             string is not anchored, and instead, any suffix found by the C method
74             will be returned in a hash key called C<_suffix>. This may be useful, for
75             example, when matching directory names, URLs, or other cases of strings with
76             unconstrained suffices. The C method will not recognise this
77             hash key; instead just use normal string concatenation on the result.
78              
79             my $userhomematch = String::MatchInterpolate->new(
80             '/home/${USER/\w+/}/',
81             allow_suffix => 1
82             );
83              
84             my $vars = $userhomematch->match( "/home/fred/public_html" );
85             print "Need to fetch file $vars->{_suffix} from $vars->{USER}\n";
86              
87             =cut
88              
89             =head1 CONSTRUCTOR
90              
91             =cut
92              
93             =head2 $smi = String::MatchInterpolate->new( $template, %opts )
94              
95             Constructs a new C object that represents the given
96             template and returns it.
97              
98             =over 8
99              
100             =item $template
101              
102             A string containing the template in the format given above
103              
104             =item %opts
105              
106             A hash containing extra options. The following options are recognised:
107              
108             =over 4
109              
110             =item allow_suffix => BOOL
111              
112             A boolean flag. If true, then the end of the string will not be anchored, and
113             instead, an extra suffix will be allowed to follow the matched portion. It
114             will be returned as C<_suffix> by the C method.
115              
116             =item default_re => Regexp or STRING
117              
118             A precompiled Regexp or string defining a regexp to use if a variable does not
119             provide a pattern of its own.
120              
121             =item delimiters => ARRAY of [Regexp or STRING]
122              
123             An array containing two precompliled Regexps or strings, giving the variable
124             openning and closing delimiters. These default to C and C
125             respectively, but by passing other values, other styles of template string may
126             be parsed.
127              
128             delimiters => [ qr/\{/, qr/\}/ ] # To match {name/pattern/}
129              
130             =back
131              
132             =back
133              
134             =cut
135              
136             sub new
137             {
138 12     12 1 2781 my $class = shift;
139 12         32 my ( $template, %opts ) = @_;
140              
141 12         55 my $self = bless {
142             template => $template,
143             vars => [],
144             }, $class;
145              
146 12         17 my %vars;
147              
148 12         19 my $matchpattern = "";
149 12         16 my $varnumber = 0;
150 12         18 my @matchbinds;
151              
152             my @interpparts;
153              
154             # The interpsub closure will contain elements of this array in its
155             # environment
156 0         0 my @literals;
157              
158 1         3 my ( $delim_open, $delim_close ) = $opts{delimiters} ?
159 12 100       78 @{ $opts{delimiters} } :
160             ( qr/\$\{/, qr/\}/ );
161              
162 12         43 while( length $template ) {
163 82 100       409 if( $template =~ s/^$delim_open// ) {
164 40         102 $template =~ s/^(\w*)//;
165 40 100       109 my $var = length $1 ? $1 : ( $varnumber + 1 );
166              
167 40 100       111 croak "Multiple occurances of $var" if exists $vars{$var};
168 39         81 $vars{$var} = 1;
169 39         44 push @{ $self->{vars} }, $var;
  39         110  
170              
171 39         53 my $pattern;
172 39 100       107 if( $template =~ m{^/} ) {
    50          
173 37         111 ( $pattern, $template ) = extract_delimited( $template, "/", '', '' );
174              
175             # Remove delimiting slashes
176 37         2295 s{^/}{}, s{/$}{} for $pattern;
177             }
178             elsif( $opts{default_re} ) {
179 2         4 $pattern = $opts{default_re};
180             }
181             else {
182 0         0 croak "Expected a pattern for $var variable";
183             }
184              
185 39 50       236 $template =~ s/^$delim_close// or croak "Expected $delim_close";
186              
187 39         75 $matchpattern .= "($pattern)";
188 39         113 push @matchbinds, "$var => \$ ". ( $varnumber + 1 );
189 39         70 push @interpparts, "\$_[$varnumber]";
190              
191 39         114 $varnumber++;
192             }
193             else {
194             # Grab up to the next delimiter, or end of the string
195 42         476 $template =~ m/^((?:\\.|[^\\])*?)(?:$|$delim_open)/;
196 42         89 my $literal = $1;
197              
198 42         73 substr( $template, 0, length $literal ) = "";
199              
200             # Unescape
201 42         120 $literal =~ s{\\(.)}{$1}g;
202              
203 42         72 $matchpattern .= quotemeta $literal;
204              
205 42         75 push @literals, $literal;
206 42         253 push @interpparts, "\$literals[$#literals]";
207             }
208             }
209              
210 11 100       32 if( $opts{allow_suffix} ) {
211 1         1 $matchpattern .= "(.*?)";
212 1         3 push @matchbinds, "_suffix => \$" . ( $varnumber + 1 );
213 1         1 $varnumber++;
214             }
215              
216 39         102 my $matchcode = "
217             \$_[0] =~ m{^$matchpattern\$} or return undef;
218             return {
219 11         31 " . join( ",\n", map { " $_" } @matchbinds ) . "
220             }
221             ";
222              
223 11         1498 $self->{matchsub} = eval "sub { $matchcode }";
224 11 50       37 croak $@ if $@;
225              
226 11         15 my $interpcode;
227             # By some benchmark testing, join() seems to be faster than chained concat
228             # after about 10 items. This is likely due to the fact that the result
229             # string only needs allocating once, rather than being incrementally grown.
230             # The call/return overhead of join() itself seems to mask this effect below
231             # that limit.
232 11 100       25 if( @interpparts < 10 ) {
233 10         25 $interpcode = join( " . ", @interpparts );
234             }
235             else {
236 1         10 $interpcode = "join( '', " . join( ", ", @interpparts ) . " )";
237             }
238              
239 11         900 $self->{interpsub} = eval "sub { $interpcode }";
240 11 50       53 croak $@ if $@;
241              
242 11         108 return $self;
243             }
244              
245             =head1 METHODS
246              
247             =cut
248              
249             =head2 @values = $smi->match( $str )
250              
251             =head2 $vars = $smi->match( $str )
252              
253             Attempts to match the given string against the template. In list context it
254             returns a list of the captured variables, or an empty list if the match fails.
255             In scalar context, it returns a HASH reference containing all the captured
256             variables, or undef if the match fails.
257              
258             =cut
259              
260             sub match
261             {
262 17     17 1 2792 my $self = shift;
263 17         25 my ( $str ) = @_;
264              
265 17 100       472 my $vars = $self->{matchsub}->( $str ) or return;
266              
267 13 100       52 return $vars if !wantarray;
268              
269 3         11 my @values = @{$vars}{ $self->vars };
  3         8  
270 3 100       12 push @values, $vars->{_suffix} if exists $vars->{_suffix};
271 3         23 return @values;
272             }
273              
274             =head2 $str = $smi->interpolate( @values )
275              
276             =head2 $str = $smi->interpolate( \%vars )
277              
278             Interpolates the given variable values into the template and returns the
279             generated string. The values may either be given as a list of strings, or in a
280             single HASH reference containing named string values.
281              
282             =cut
283              
284             sub interpolate
285             {
286 7     7 1 1579 my $self = shift;
287 7 100       48 if( ref $_[0] eq "HASH" ) {
288 5         14 return $self->{interpsub}->( @{$_[0]}{ $self->vars } );
  5         142  
289             }
290             else {
291 2         53 return $self->{interpsub}->( @_ );
292             }
293             }
294              
295             =head2 @vars = $smi->vars()
296              
297             Returns the list of variable names defined / used by the template, in the
298             order in which they appear.
299              
300             =cut
301              
302             sub vars
303             {
304 16     16 1 3181 my $self = shift;
305 16         21 return @{ $self->{vars} };
  16         105  
306             }
307              
308             # Keep perl happy; keep Britain tidy
309             1;
310              
311             __END__