File Coverage

blib/lib/Regexp/Bind.pm
Criterion Covered Total %
statement 71 72 98.6
branch 12 14 85.7
condition 8 16 50.0
subroutine 11 11 100.0
pod 0 4 0.0
total 102 117 87.1


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             Regexp::Bind - Bind variables to captured buffers
6              
7             =head1 SYNOPSIS
8              
9             use Regexp::Bind qw(
10             bind global_bind
11             bind_array global_bind_array
12             );
13              
14             $record = bind($string, $regexp, @fields);
15             @record = global_bind($string, $regexp, @fields);
16              
17             $record = bind(\$string, $regexp, @fields);
18             @record = global_bind(\$string, $regexp, @fields);
19              
20             $record = bind_array($string, $regexp);
21             @record = global_bind_array($string, $regexp);
22              
23             $record = bind_array(\$string, $regexp);
24             @record = global_bind_array(\$string, $regexp);
25              
26             $record = bind(\$string, $embedded_regexp);
27             @record = global_bind(\$string, $embedded_egexp);
28              
29              
30             =head1 DESCRIPTION
31              
32             This module is an extension to perl's native regexp function. It binds
33             anonymous hashes or named variables to matched buffers. Both normal
34             regexp syntax and embedded regexp syntax are supported. You can view
35             it as a tiny and petite data extraction system.
36              
37             =head1 FUNCTIONS
38              
39             Two types of function are exported. They bind the given fields to
40             captured contents, and return anonymous hashes/arrayes of the fields.
41              
42             In the following example, you can pass in either a string or a
43             string-reference.
44              
45             =head2 Match the first occurrence
46              
47             use Data::Dumper;
48              
49             =head3 Binding to anonymous hash
50              
51             $record = bind($string, $regexp, qw(field_1 field_2 field_3));
52             print Dumper $record;
53              
54             =head3 Binding to array
55              
56             $record = bind_array($string, $regexp);
57             print $record->[0];
58              
59             =head2 Do global matching and store matched parts in @record
60              
61             =head3 Binding to anonymous hash
62              
63             @record = global_bind($string, $regexp, qw(field_1 field_2 field_3));
64             print Dumper $_ foreach @record;
65              
66             =head3 Binding to array
67              
68             @record = global_bind_array($string, $regexp);
69             print $record[0]->[0];
70              
71             =head1 NAMED VARIABLE BINDING
72              
73             To use named variable binding, please set $Regexp::Bind::USE_NAMED_VAR to non-undef, and then matched parts will be bound to named variables while using bind(). It is not supported for global_bind(), bind_array() and global_bind_array().
74              
75             $Regexp::Bind::USE_NAMED_VAR = 1;
76             bind($string, $regexp, qw(field_1 field_2 field_3));
77             print "$field_1 $field_2 $field_3\n";
78              
79              
80             =head1 EMBEDDED REGEXP
81              
82             Using embedded regexp syntax means you can embed fields right in
83             regexp itself. Its embedded syntax exploits the feature of in-line
84             commenting in regexps.
85              
86             The module first tries to detect if embedded syntax is used. If
87             detected, then comments are stripped and regexp is turned back into a
88             simple one.
89              
90             Using embedded syntax, for the sake of simplicity and legibility,
91             field's name is restricted to B only. bind_array() and
92             global_bind_array() do not support embedded syntax.
93              
94              
95             Example:
96              
97             bind($string, qr'# (?#\w+) (?#\d+)\n'm);
98              
99             is converted into
100              
101             bind($string, qr'# (\w+) (\d+)\n'm);
102              
103             If embedded syntax is detected, further input arguments are ignored. It means that
104              
105             bind($string, qr'# (?#\w+) (?#\d+)\n'm,
106             qw(field_1 field_2));
107              
108             is the same as
109              
110             bind($string, qr'# (?#\w+) (?#\d+)\n'm);
111              
112             and conceptually equal to
113              
114             bind($string, qr'# (\w+) (\d+)\n'm, qw(field_1 field_2));
115              
116              
117              
118             Note that the module simply replaces B<(?#Efield nameE> with
119             B<(> and binds the field's name to buffer. It does not check for
120             syntax correctness, so any fancier usage may crash.
121              
122              
123             =head1 INLINE FILTERING
124              
125             Inline filtering now works with B. Matched parts are
126             saved in $_, and you can do some simple transformation within the
127             brackets before they are exported.
128              
129             bind($string, qr'# (?#{ s/\s+//, $_ }\w+) (?#{ $_*= 10, $_ }\d+)\n'm);
130              
131              
132             =cut
133              
134             package Regexp::Bind;
135              
136 1     1   52889 use Exporter;
  1         3  
  1         98  
137             our @ISA = qw(Exporter);
138             our @EXPORT_OK = qw(bind global_bind bind_array global_bind_array);
139             our $VERSION = '0.05';
140              
141             our $USE_NAMED_VAR = 0;
142 1     1   7 use strict;
  1         2  
  1         41  
143 1     1   5 no strict 'refs';
  1         7  
  1         198  
144              
145             sub _get_fields {
146 16     16   21 my @field;
147 16         712 while($_[0] =~ s,\(\?#<(\w+?)>,(,o){
148 30         173 push @field, $1;
149             }
150 16         56 @field;
151             }
152              
153             sub _get_filters {
154 16     16   24 my @filter;
155             # well, i know, this vulgar pattern doesn't really work for all occasions
156             # i will introduce Text::Balanced with this.
157 16         128 while($_[0] =~ s,(\(\?#(?:<(?:\w+?)>))\{(.+?)\},$1,o){
158 8         92 push @filter, $2;
159             }
160 16         52 (undef, map{eval 'sub { local $_ = shift;'.$_.'};' }@filter);
  8         1236  
161             }
162              
163 1     1   5 use Data::Dumper;
  1         1  
  1         53  
164 1     1   4 use B::Deparse;
  1         2  
  1         1073  
165             sub bind {
166 10   50 10 0 12464 my $string = (ref($_[0]) eq 'SCALAR' ? ${shift()} : shift) || die "No string input";
167 10   50     36 my $regexp = shift || die "No regexp input";
168              
169 10         29 my @filter = _get_filters $regexp;
170 10         34 my @field = _get_fields $regexp;
171 10 100       37 @field = @_ unless @field;
172              
173              
174 10         107 $string =~ m/$regexp/;
175 10         14 my $cnt = 1;
176 10 100       24 if($USE_NAMED_VAR){
177 4         12 my $pkg = (caller)[0];
178 4         10 foreach my $field (@field){
179 12 50       32 my $t = ref($filter[$cnt]) eq 'CODE'? $filter[$cnt]->(${$cnt}) : ${$cnt};
  0         0  
  12         39  
180 12         14 $cnt++;
181 12         13 ${"${pkg}::$field"} = $t;
  12         51  
182             }
183             }
184             else {
185             +{
186 4         108 map{
187 6 100       13 my $t = ref($filter[$cnt]) eq 'CODE'? $filter[$cnt]->(${$cnt}) : ${$cnt};
  18         53  
  14         50  
188 18         31 $cnt++;
189 18         110 $_ => $t;
190             } @field
191             };
192             }
193             }
194              
195             sub bind_array {
196 2   50 2 0 43 my $string = (ref($_[0]) eq 'SCALAR' ? ${shift()} : shift) || die "No string input";
197 2   50     7 my $regexp = shift || die "No regexp input";
198 2         4 my $cnt = 1;
199 2         56 [ ($string =~ m/$regexp/) ];
200             }
201              
202              
203             sub global_bind {
204 6   50 6 0 3212 my $string = (ref($_[0]) eq 'SCALAR' ? ${shift()} : shift) || die "No string input";
205 6   50     19 my $regexp = shift || die "No regexp input";
206              
207 6         17 my @filter = _get_filters $regexp;
208 6         20 my @field = _get_fields $regexp;
209 6 100       22 @field = @_ unless @field;
210              
211 6         9 my @bind;
212             my $cnt;
213 6         72 while($string =~ m/$regexp/g){
214 18         131 $cnt = 1;
215 12         333 push @bind,
216             +{
217             map{
218 18         35 my $t = ref($filter[$cnt]) eq 'CODE'
219 54 100       118 ? $filter[$cnt]->(${$cnt}) : ${$cnt};
  42         139  
220 54         77 $cnt++;
221 54         3334 $_ => $t;
222             } @field
223             };
224             }
225 6 50       115 wantarray ? @bind : \@bind;
226             }
227              
228             sub global_bind_array {
229 2   50 2 0 11 my $string = (ref($_[0]) eq 'SCALAR' ? ${shift()} : shift) || die "No string input";
230 2   50     7 my $regexp = shift || die "No regexp input";
231              
232 2         4 my @bind;
233 2         23 push @bind, [ map{${$_}} 1..$#+ ] while $string =~ m/$regexp/g;
  18         17  
  18         101  
234 2         16 @bind;
235             }
236              
237             1;
238             __END__