File Coverage

blib/lib/Regexp/Match/List.pm
Criterion Covered Total %
statement 18 76 23.6
branch 0 16 0.0
condition 0 11 0.0
subroutine 6 18 33.3
pod 1 7 14.2
total 25 128 19.5


line stmt bran cond sub pod time code
1             package Regexp::Match::List;
2              
3             # $Id: List.pm,v 1.1.1.1.8.3 2004/04/29 01:45:31 dgrant Exp $
4              
5             # IDEA: allow match() to skip regexps below a certain hitrate.
6             # IDEA: use qr// to precompile regexps
7              
8              
9 1     1   310397 use strict;
  1         3  
  1         49  
10 1     1   6 use warnings;
  1         2  
  1         45  
11              
12 1     1   5 use base qw( Class::Base );
  1         91  
  1         1273  
13              
14 1     1   2821 use Data::Sorting qw( :basics :arrays );
  1         6344  
  1         223  
15 1     1   1332 use Data::Dumper;
  1         11613  
  1         86  
16              
17 1     1   9 use vars qw($VERSION %CONF);
  1         2  
  1         1153  
18              
19             $VERSION = 0.50;
20              
21             %CONF =
22             # CONFIGURATION -- This configuation is loaded into $self via load_args()
23             (
24             # INTERNAL DEFAULTS (can be touched externally)
25             USESTUDY => 1, # use "study STRING;" for regexp strings
26             OPCHECK => 50, # Num of match() calls before calling optimize()
27             OPSKIP => 0, # Skip optimize() ?
28             OPWEIGHT => 1, # Default regexp hit weight
29             OPHITS => 0, # Default regexp hits
30             OPSORTCONF => # Data::Sorting Sort Rules. Used in optimize()
31             [ # The hashlike syntax is to get around some issue
32             # in Data::Sorting that wouldn't let me use a hashref
33             -compare => 'numeric',
34             -order => 'reverse',
35             -sortkey => sub { $_[0]->{'hits'} * $_[0]->{'weight'} }
36             ],
37            
38             # INTERNAL STRUCTURE (cannot be touched externally)
39             '_RE' => [], # Store regexps in arrayref. See add()
40             '_COUNT' => # Number of times a function has been called
41             {
42             match => 0,
43             optimize => 0
44             },
45             );
46            
47              
48              
49             sub match($$)
50             # PUBLIC METHOD
51             # Test a string for all available regular expressions.
52             #
53             {
54 0     0 0   my $self = shift;
55 0           my ($string) = @_;
56 0           my ($RE, $test, @results);
57            
58             # A possible regexp optimization. see % perldoc -f study
59 0 0         study $string if ($self->{'USESTUDY'});
60              
61 0           REGEXP:
62 0           for my $i (0..$#{ $self->{'_RE'} })
63             # Iterate through all regular expressions.
64             # This uses a for() b/c it allows for more control
65             # than Set::Array::foreach() (we can escape on a match)
66             {
67 0           $self->_increment(); # $self->{'_COUNT'}{'match'}++
68 0           $self->optimize(); # which is used by optimize()
69            
70 0           $RE = $self->{'_RE'}->[$i]; # The current regular expression
71              
72             # Execute the regular expression in list context and
73             # store the results ($1 .. $n) in an array
74 0           @results = ($string =~ $RE->{'test'});
75              
76 0           $self->debug("STRING:$string\n");
77 0           $self->debug("TEST:$RE->{'test'}\n");
78 0           $self->debug("RESULTS:", (scalar(@results)), '-', join(',', @results), "\n\n");
79            
80 0 0         if ($RE->callback(@results))
81             # A successful match may not be enough for a positive
82             # result depending on the outcome of the callback which
83             # is entirely out of Regexp::Match::List's control.
84             # When it is, we acknowledge and reward a successful
85             # regular expression, then bust out of this hellish loop.
86             {
87 0           $RE->count_hit(); # $RE->{'hits'}++
88 0           last REGEXP; # Bust out
89             }
90             }
91            
92             #print Dumper($RE, @results);
93            
94 0 0         return ($results[0])
95             ? ($RE, @results)
96             : ();
97             }
98              
99             sub add(\%)
100             # PUBLIC METHOD
101             # Add a regular expression to the mix.
102             # IN: (scalar) regular expression w/o '/' (i.e. '^.+?\s$')
103             # [(scalar) multiplier for hits, used by optimize() ]
104             # OUT: Whatever Set::Array::push() returns
105             {
106 0     0 0   my $self = shift;
107 0           my %re = @_;
108            
109 0           $self->check_re_conf(\%re);
110            
111 0   0       $re{'weight'} ||= $self->{'OPWEIGHT'};
112 0   0       $re{'hits'} ||= $self->{'OPHITS'};
113            
114 0           push (@{ $self->{'_RE'} }, Regexp::Match::List::RE->create(%re));
  0            
115             }
116              
117             sub check_re_conf(\%)
118             # Determine whether the given hashref contains all the information
119             # required to create a regexp entry in $self->{'RE'}
120             # TODO: complete check_re_conf()
121             {
122 0     0 0   my $self = shift;
123 0           return 1;
124             }
125              
126             sub optimize()
127             # PUBLIC METHOD, USED INTERNALLY
128             # Sort Set::Array object of regular expressions by # of times
129             # match() is called. This will run only when match() has been called
130             # a multiple of $self->{'OPCHECK'} times
131             {
132 0     0 0   my $self = shift;
133 0           my $cnt_match = $self->_count('match');
134            
135             # We only optimize when...
136             return if (
137             # we are told allowed to, and when...
138 0 0 0       ($self->{'OPSKIP'} == 1) ||
139             # the iteration counter reaches a multiple of $self->{'OPCHECK'}
140             (($cnt_match % $self->{'OPCHECK'}) > 1)
141             );
142            
143             # Count up a hit for this function only when we actually resort
144             # This information is only useful for reference
145 0           $self->_increment(); # $self->{'_COUNT'}{'optimize'}++
146            
147 0           $self->debug("optimize(): running at match() call #$cnt_match\n\n");
148            
149             # Sort using Data::Sorting. $self->{'OPSORTCONF'} contains a
150             # sort rule configuration.
151 0           sort_arrayref($self->{'_RE'}, @{ $self->{'OPSORTCONF'} });
  0            
152              
153             }
154              
155              
156              
157              
158              
159              
160             # EXTREMELY PRIVATE METHODS
161             # Haha. Philstrdamous, I know you love this one.
162             # Increments a counter by one. The particular counter is determined
163             # by the calling function. i.e. $self->{'_COUNT'}{'optimize'}++
164 0     0     sub _increment() { $_[0]->{'_COUNT'}{ (split '::', (caller(1))[3])[3] }++ }
165             # Returns the value of the counter for the given function
166 0     0     sub _count() { $_[0]->{'_COUNT'}{$_[1]} }
167              
168              
169              
170              
171              
172              
173             # CONSTRUCTOR RELATED
174             sub init()
175             # Rekindle all that we are
176             {
177 0     0 1   my ($self, $config) = @_; # Get vars from Class::Base::new()
178 0           $self->load_args($config); # Load config into $self
179 0           $self->create_attributes(); # Set our attributes and defaults
180 0           return $self;
181             }
182              
183             sub create_attributes()
184             # Add internal attributes to $self (does not overwrite existing values)
185             # AND apply default values to externally setable parameters
186             {
187 0     0 0   my $self = shift;
188            
189             # See %CONF declaration at the top of this file
190            
191 0           foreach my $a (keys %CONF)
192             {
193 0 0         $self->{$a} = $CONF{$a} unless (exists($self->{$a}));
194             }
195            
196 0           return $self;
197             }
198              
199             sub load_args($$)
200             # Used by the constructor to load config into $self.
201             # NOTE: _ is skipped
202             {
203 0     0 0   my ($self, $args) = (shift, shift);
204            
205 0           for my $key (keys %{ $args })
  0            
206             {
207             # Skip values that could overwrite internal attributes
208 0 0         next if $key =~ /^\_/;
209            
210 0 0         (!exists($self->{$key}))
211             ? $self->{$key} = $args->{$key}
212             : ($self->debug("loadArgs: $key already exists in \$self"));
213             }
214            
215 0           return $self;
216             }
217              
218              
219             ###############################################################################
220              
221             # TODO: move into separate module (Regexp::Match::List::RE?)
222             package Regexp::Match::List::RE;
223             # A simple object to store a regular expression test and all its matter
224              
225             sub create()
226             # A constructor.
227             # See add()
228             {
229 0     0     my $class = shift;
230 0           my %att = @_;
231 0           my $self = {};
232            
233 0           bless \%att, $class;
234             }
235              
236             # Increment hit tally for this regular expression by user value or 1
237             # See match()
238 0   0 0     sub count_hit() { shift->{'hits'} += shift || 1; }
239              
240             sub callback()
241             # Run this regular expression's callback if one exists.
242             # By default we will return the result of the regexp test.
243             # IN: (array) results ($1 .. $n) of this RE on the current string
244             # OUT: (bool) success as determined by the callback
245             #
246             # See match()
247             {
248 0     0     my $self = shift;
249            
250             # If there is no callback, return the test result
251 0 0         return ($#_ >= 0) unless(exists($self->{'callback'}));
252            
253             # Send the callback the test result as well as a reference to ourself
254 0           return &{ $self->{'callback'} }($self, @_);
  0            
255             }
256              
257              
258             # $Log: List.pm,v $
259             # Revision 1.1.1.1.8.3 2004/04/29 01:45:31 dgrant
260             # - Initial preparation for CPAN
261             #
262             # Revision 1.1.1.1.8.2 2004/04/23 23:30:25 dgrant
263             # - Added callback template to Regexp/Match/List.pm
264             #
265             # Revision 1.1.1.1.8.1 2004/04/16 17:10:34 dgrant
266             # - Merging libperl-016 changes into the libperl-1-current trunk
267             #
268             # Revision 1.1.1.1.2.1.20.2 2004/04/08 18:23:56 dgrant
269             # *** empty log message ***
270             #
271             # Revision 1.1.1.1.2.1.20.1 2004/04/08 16:42:30 dgrant
272             # - No significant change
273             #
274             # Revision 1.1.1.1.2.1 2004/03/25 01:49:51 dgrant
275             # - Inital import of List.pm
276             # - Added cvs Id and Log variables
277             #
278              
279             1;
280             __END__