File Coverage

blib/lib/Parse/RandGen/Regexp.pm
Criterion Covered Total %
statement 100 150 66.6
branch 33 86 38.3
condition 12 39 30.7
subroutine 9 13 69.2
pod 4 5 80.0
total 158 293 53.9


line stmt bran cond sub pod time code
1             # $Revision: #3 $$Date: 2005/08/31 $$Author: jd150722 $
2             ######################################################################
3             #
4             # This program is Copyright 2003-2005 by Jeff Dutton.
5             #
6             # This program is free software; you can redistribute it and/or modify
7             # it under the terms of either the GNU General Public License or the
8             # Perl Artistic License.
9             #
10             # This program is distributed in the hope that it will be useful,
11             # but WITHOUT ANY WARRANTY; without even the implied warranty of
12             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13             # GNU General Public License for more details.
14             #
15             # If you do not have a copy of the GNU General Public License write to
16             # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
17             # MA 02139, USA.
18             ######################################################################
19              
20             package Parse::RandGen::Regexp;
21              
22             require 5.006_001;
23 4     4   20 use Carp;
  4         8  
  4         247  
24 4     4   22 use Parse::RandGen qw($Debug);
  4         7  
  4         420  
25 4     4   22 use Data::Dumper; # FIX - debug only
  4         7  
  4         162  
26 4     4   3920 use YAPE::Regex;
  4         149997  
  4         42  
27 4     4   1821 use strict;
  4         10  
  4         172  
28 4     4   21 use vars qw(@ISA %_Yterm $Debug);
  4         8  
  4         11516  
29             @ISA = ('Parse::RandGen::Condition');
30              
31             sub _newDerived {
32 5 50   5   19 my $self = shift or confess ("%Error: Cannot call without a valid object!");
33 5         11 my $type = ref($self);
34 5         35 my $elemRef = ref($self->element());
35 5 50       29 ($elemRef eq "Regexp") or confess("%Error: $type has an element that is not a Regexp reference (ref=\"$elemRef\")!");
36              
37             # Implement a RandGen::Rule to represent the complexities of the Regexp
38             # This is only used for pick()ing a matching value for the Regexp...
39 5         23 my $yape = YAPE::Regex->new($self->element());
40 5         211 $yape->parse();
41 5         7240 my $treeArray = $yape->{TREE};
42 5 50       9 ($#{$treeArray} > 0) and die("Found a YAPE::Regex TREE with more than one entry!\n");
  5         30  
43 5 50       30 (ref($$treeArray[0]) eq "YAPE::Regex::group") or die("Found a YAPE::Regex TREE, but its entry is not a group!\n");
44              
45 5         319 $self->{_rule} = Parse::RandGen::Rule->new();
46 5         43 my $prod = Parse::RandGen::Production->new();
47 5         38 $self->{_rule}->addProd($prod);
48 5         216 my $cur = {
49             rule => $self->{_rule},
50             prod => $prod,
51             on => { },
52             off => { i=>1, m=>1, s=>1, x=>1 },
53             };
54 5 100       187 $Data::Dumper::Indent = 1 if $Debug;
55             #print ("Parse::RandGen::Regexp::new(): Getting ready to parse the following Regexp ".$self->element().":\n", Data::Dumper->Dump([$yape])) if $Debug;
56 5         42 $self->_parseRegexp($$treeArray[0], { rule=>$self->{_rule}, prod=>$prod } );
57             #print ("Parse::RandGen::Regexp::new(): Finished parsing the following Regexp ".$self->element()." and now \$self->{_rule} is:\n", $self->{_rule}->dumpHeir(), "\n\n") if $Debug;
58             #print ("Parse::RandGen::Regexp::new(): Finished parsing the following Regexp ".$self->element()." and now \$self->{_rule} is:\n", Data::Dumper->Dump([$self->{_rule}])) if $Debug;
59             }
60              
61             sub dump {
62 0 0   0 0 0 my $self = shift or confess ("%Error: Cannot call without a valid object!");
63 0         0 my $delimiter = "'";
64 0         0 my $output = $self->element();
65 0         0 $output =~ s/($delimiter)/\\$1/gs; # First, escape the delimiter (compiled regex is devoid of a specific delimiter)
66 0         0 $output = "m${delimiter}${output}${delimiter}";
67 0         0 return $output;
68             }
69              
70             sub pick {
71 202 50   202 1 38809 my $self = shift or confess ("%Error: Cannot call without a valid object!");
72 202         913 my %args = ( match=>1, # Default is to pick matching data
73             captures=>{ }, # Captures that are being explicitly specified
74             @_ );
75 202         361 my $vals = { };
76 202         377 foreach my $cap (keys %{$args{captures}}) {
  202         1026  
77 0 0       0 my $ruleRef = $self->capture($cap)
78             or confess("%Error: Regexp::pick(): Unknown capture field ($cap)!\n");
79 0         0 $vals->{$ruleRef} = $args{captures}{$cap};
80             }
81 202         862 delete $args{captures};
82 202         2400 my $val = $self->{_rule}->pick(%args, vals=>$vals);
83 202         319 if (0) {
84             my $elem = $self->element();
85             print ("Parse::RandGen::Regexp($elem)::pick(match=>$args{match}) with value of ", $self->dumpVal($val), "\n");
86             }
87 202         1353 return($val);
88             }
89              
90             sub numCaptures {
91 0 0   0 1 0 my $self = shift or confess ("%Error: Cannot call without a valid object!");
92 0 0       0 return 0 unless defined($self->{_captureList});
93 0         0 my @caps = @{$self->{_captureList}};
  0         0  
94 0         0 return ($#caps + 1);
95             }
96              
97             sub capture {
98 0 0   0 1 0 my $self = shift or confess ("%Error: Cannot call without a valid object!");
99 0         0 my $capture = shift;
100 0 0 0     0 defined($capture) and ($capture =~ m/^(\d+)|([a-z]\w*)$/i)
    0          
101             or confess("%Error: Capture identifier of \"".(defined($capture)?$capture:"[undef]")."\" is not valid!\n");
102 0         0 my $num = $1;
103 0         0 my $name = $2;
104              
105 0 0       0 if (defined($num)) {
106 0         0 my $numCaptures = $self->numCaptures();
107 0 0 0     0 ($num >= 1) and ($num <= $numCaptures)
108             or confess("%Error: Regexp::capture(): Capture number $num is invalid (only captures 1..$numCaptures exist for this Regexp)!\n");
109 0         0 return $self->{_captureList}[$num-1];
110             } else {
111 0 0 0     0 defined($self->{_captureNames}) and defined($self->{_captureNames}{$name})
112             or confess("%Error: Regexp::capture(): Cannot find named capture \"$name\"!\n");
113 0         0 return $self->{_captureNames}{$name};
114             }
115             }
116              
117             sub nameCapture {
118 0 0   0 1 0 my $self = shift or confess ("%Error: Cannot call without a valid object!");
119 0         0 my %args = @_; # "capture# => name" pairs
120 0 0       0 $self->{_captureNames} = { } unless defined($self->{_captureNames});
121 0         0 foreach my $capNum (keys %args) {
122 0 0 0     0 defined($capNum) and ($capNum =~ m/\d+/)
123             or confess("%Error: Regexp::nameCapture(): Capture number specified is invalid ($capNum)!\n");
124 0         0 my $numCaptures = $self->numCaptures();
125 0 0 0     0 ($capNum >= 1) and ($capNum <= $numCaptures)
126             or confess("%Error: Regexp::nameCapture(): Cannot name capture number $capNum (only captures 1..$numCaptures exist for this Regexp)!\n");
127              
128 0         0 my $ruleName = $args{$capNum};
129 0         0 my $rule = $self->{_captureList}[$capNum];
130 0         0 $rule->{_name} = $ruleName; # Name the rule (does not get registered with the grammar - is that OK?)
131 0         0 $self->{_captureNames}{$ruleName} = $rule; # For lookup within the Regexp object via "capture()" function
132             }
133             }
134              
135             # YAPE::Regex elements that are supported as CharClass objects
136             %_Yterm = (
137             "YAPE::Regex::class" => sub{ my $y=shift; return ( $y->{NEG} . $y->{TEXT} ); },
138             "YAPE::Regex::slash" => sub{ my $y=shift; return ($y->text()); },
139             "YAPE::Regex::macro" => sub{ my $y=shift; return ($y->text()); },
140             "YAPE::Regex::oct" => sub{ my $y=shift; return ($y->text()); },
141             "YAPE::Regex::hex" => sub{ my $y=shift; return ($y->text()); },
142             "YAPE::Regex::utf8hex" => sub{ my $y=shift; return ($y->text()); },
143             "YAPE::Regex::ctrl" => sub{ my $y=shift; return ($y->text()); },
144             "YAPE::Regex::named" => sub{ my $y=shift; return ($y->text()); },
145             "YAPE::Regex::any" => sub{ my $y=shift; return ($y->text()); },
146             );
147              
148             sub _parseRegexp {
149 18 50   18   213 my $self = shift or confess ("%Error: Cannot call without a valid object!");
150 18         24 my $yIter = shift; # YAPE::Regex object iterator
151 18 50       39 my $curRef = shift or confess(); # Current position in Condition ($self) object
152 18         465 my %cur = %$curRef; # Make a local copy of current state
153              
154 18         43 my $yType = ref($yIter);
155 18 100       745 if ($yType eq "YAPE::Regex::group") {
156 5         23 foreach my $switch (split //, $yIter->{ON}) { delete $cur{off}{$switch}; $cur{on}{$switch} = 1; }
  0         0  
  0         0  
157 5         23 foreach my $switch (split //, $yIter->{OFF}) { delete $cur{on}{$switch}; $cur{off}{$switch} = 1; }
  20         211  
  20         206  
158             }
159              
160 18 100 100     698 if ( ($yType eq "YAPE::Regex::group")
    50 33        
      33        
161             || ($yType eq "YAPE::Regex::capture") ){
162 8 50       30 defined($yIter->{NGREED}) or confess("$yType type does not have NGREED implemented!\n");
163 8 50       23 defined($yIter->{QUANT}) or confess("$yType type does not have QUANT implemented!\n");
164              
165 8         23 my @yList = @{$yIter->{CONTENT}};
  8         213  
166 8         191 foreach my $elemIter (@yList) {
167 15         30 my $elemType = ref($elemIter);
168 15 100 66     982 if ($elemType eq "YAPE::Regex::alt") {
    100          
169 2         12 $cur{rule}->addProd($cur{prod} = Parse::RandGen::Production->new());
170             } elsif ( ($elemType eq "YAPE::Regex::group")
171             || ($elemType eq "YAPE::Regex::capture") ) {
172              
173 3 50       13 defined($elemIter->{NGREED}) or confess("$elemType type does not have NGREED implemented!\n");
174 3 50       11 defined($elemIter->{QUANT}) or confess("$elemType type does not have QUANT implemented!\n");
175 3         8 my $greedy = !$elemIter->{NGREED};
176 3         6 my $quant = $elemIter->{QUANT};
177              
178 3         13 my $prod = Parse::RandGen::Production->new();
179 3         20 my $rule = Parse::RandGen::Rule->new();
180 3         18 $rule->addProd($prod);
181 3 50       23 if ($elemType eq "YAPE::Regex::capture") {
182 3 50       16 $self->{_captureList} = [ ] unless ($self->{_captureList});
183 3         10 push(@{$self->{_captureList}}, $rule);
  3         8  
184             }
185              
186             #print "Creating a subrule (elem=>$rule, quant=>$quant, greedy=>$greedy)\n" if $Debug;
187 3         44 $cur{prod}->addCond(Parse::RandGen::Subrule->new($rule, quant=>$quant, greedy=>$greedy));
188              
189 3         22 my %next = %cur;
190 3         14 $next{rule} = $rule;
191 3         12 $next{prod} = $prod;
192 3         33 $self->_parseRegexp($elemIter, \%next);
193             } else {
194 10         448 $self->_parseRegexp($elemIter, \%cur);
195             }
196             }
197             } elsif ( ($yType eq "YAPE::Regex::whitespace")
198             || ($yType eq "YAPE::Regex::anchor")
199             || ($yType eq "YAPE::Regex::comment")
200             ){
201             # Do nothing, simply ignore these objects
202             } else {
203 10 50       31 defined($yIter->{NGREED}) or confess("$yType type does not have NGREED implemented!\n");
204 10 50       244 defined($yIter->{QUANT}) or confess("$yType type does not have QUANT implemented!\n");
205 10         331 my $greedy = !$yIter->{NGREED};
206 10         198 my $quant = $yIter->{QUANT};
207 10         16 my @charClasses = ();
208              
209 10 100 66     79 if (($yType eq "YAPE::Regex::text") && $cur{off}{i} && !$quant) {
    50 66        
210 7         81 my $cond = Parse::RandGen::Literal->new($yIter->{TEXT}, greedy => $greedy);
211 7         35 $cur{prod}->addCond($cond);
212             } elsif ($yType eq "YAPE::Regex::alt") {
213 0         0 confess("Not expecting a $yType here!\n");
214             } else {
215 3 50       28 if ($yType eq "YAPE::Regex::text") {
    50          
216             # Case-insensitive text
217 0         0 my $text = $yIter->{TEXT};
218 0         0 for (my $offset=0; $offset < length($text); $offset++) {
219 0         0 my $char = substr($text, $offset, 1);
220 0         0 my $nchar = lc($char);
221 0 0       0 $nchar = uc($char) unless ($nchar ne $char);
222 0 0 0     0 if (($nchar eq $char) || $cur{off}{i}) {
223             #print ("Parse::RandGen::Regexp: creating a case-sensitive CharClass for letter $offset of the literal \"$text\" ([$char])\n");
224 0         0 push @charClasses, "$char";
225             } else {
226             #print ("Parse::RandGen::Regexp: creating a case-insenstive CharClass for letter $offset of the literal \"$text\" ([$char$nchar])\n");
227 0         0 push @charClasses, "$char$nchar";
228             }
229             }
230             } elsif (exists($_Yterm{$yType})) {
231 3         6 @charClasses = ( &{$_Yterm{$yType}}($yIter) );
  3         23  
232             } else {
233 0         0 confess("%Error: YAPE type unknown or unsupported (\"$yType\")!");
234             }
235              
236 3         14 foreach my $cclass (@charClasses) {
237 3         6 my $on = join('', sort(keys(%{$cur{on}})));
  3         14  
238 3         6 my $off = join('', sort(keys(%{$cur{off}})));
  3         32  
239 3         7 my $charClassRE;
240 3 50       11 if ($yType eq "YAPE::Regex::any") {
241 0         0 $charClassRE = qr/(?$on-$off:$cclass)/; # Cannot match the . character in [ ]
242             } else {
243             #print "Parse::RandGen::Regexp: cclass is $cclass\n";
244 3 50 33     43 if (!$on && ($off eq "imsx")) {
245 3         97 $charClassRE = qr/[$cclass]/; # default
246             } else {
247 0         0 $charClassRE = qr/(?$on-$off:[$cclass])/;
248             }
249             #print "Parse::RandGen::Regexp: cclass charClassRE is $charClassRE\n";
250             }
251            
252 3         59 my $cond = Parse::RandGen::CharClass->new($charClassRE, quant=>$quant, greedy=>$greedy);
253 0           $cur{prod}->addCond($cond);
254             }
255             }
256             }
257             }
258              
259             ######################################################################
260             #### Package return
261             1;
262             __END__