File Coverage

blib/lib/Regexp/Ignore.pm
Criterion Covered Total %
statement 126 135 93.3
branch 10 16 62.5
condition 7 10 70.0
subroutine 12 14 85.7
pod 10 11 90.9
total 165 186 88.7


line stmt bran cond sub pod time code
1             package Regexp::Ignore;
2              
3 2     2   49 use 5.006;
  2         6  
  2         80  
4 2     2   11 use strict;
  2         4  
  2         60  
5 2     2   14 use warnings;
  2         3  
  2         3581  
6              
7             our $VERSION = '0.03';
8              
9             #############################################################
10             # new($original_text, $delimiter_pattern)
11             #############################################################
12             # the constructor
13             sub new {
14 18     18 1 38 my $proto = shift; # get the class name
15 18   33     84 my $class = ref($proto) || $proto;
16 18         42 my $self = {};
17              
18 18         91 $self->{TEXT} = shift;
19 18         47 $self->{DELIMITER_PATTERN} = shift;
20            
21 18         42 $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} = $self->{DELIMITER_PATTERN};
22 18         56 $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =
23             quotemeta($self->{DELIMITER_PATTERN_REGULAR_EXPRESSION});
24 18         95 $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =~
25             s/__INDEX__/\(\?\:[\\d\]\+\)/g;
26 18         160 $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =
27             qr/$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION}/;
28 18         39 $self->{TRANSLATION_POSITION_FACTOR} = 0;
29 18         49 bless ($self, $class);
30 18         60 return $self;
31             } # of new
32              
33             #################
34             # text
35             #################
36             sub text {
37 36     36 1 2068 my $self = shift;
38 36 50       103 if (@_) { $self->{TEXT} = shift }
  0         0  
39 36         536 return $self->{TEXT};
40             } # of text
41              
42             #################
43             # delimited_text
44             #################
45             sub delimited_text {
46 18     18 1 5249 my $self = shift;
47 18         296 return $self->{DELIMITED_TEXT};
48             } # of delimited_text
49              
50             #################
51             # cleaned_text
52             #################
53             sub cleaned_text {
54 72     72 1 2741 my $self = shift;
55 72 100       189 if (@_) { $self->{CLEANED_TEXT} = shift }
  18         66  
56 72         599 return $self->{CLEANED_TEXT};
57             } # of cleaned_text
58              
59             ######################
60             # delimiter_pattern
61             ######################
62             sub delimiter_pattern {
63 0     0 1 0 my $self = shift;
64 0 0       0 if (@_) { $self->{DELIMITER_PATTERN} = shift }
  0         0  
65 0         0 return $self->{DELIMITER_PATTERN};
66             } # of delimiter_pattern
67              
68             ######################
69             # translation_position_factor
70             ######################
71             sub translation_position_factor {
72 18     18 0 10747 my $self = shift;
73 18 50       72 if (@_) { $self->{TRANSLATION_POSITION_FACTOR} = shift }
  18         37  
74 18         51 return $self->{TRANSLATION_POSITION_FACTOR};
75             } # of translation_position_factor
76              
77             ########################
78             # get_tokens
79             ########################
80             sub get_tokens {
81 0     0 1 0 my $self = shift;
82 0         0 die("This is an abstract method");
83             } # of get_tokens
84              
85             #####################
86             # split
87             #####################
88             sub split {
89 18     18 1 2398 my $self = shift;
90             # get the tokens
91 18         59 $self->{TOKENS} = [];
92 18         49 $self->{FLAGS} = [];
93 18         74 ($self->{TOKENS}, $self->{FLAGS}) = $self->get_tokens();
94              
95             # now build the delimited text which will hold the wanted tokens and
96             # for each token, a delimiter just before it, that represents the token.
97             # build also the cleaned text and the positions translation between them
98 18         84 $self->{DELIMITED_TEXT} = "";
99 18         45 $self->{CLEANED_TEXT} = "";
100 18         30 my $index = 0;
101 18         32 my $cleaned_position = 0;
102 18         47 my $delimited_position = 0;
103 18         46 my $delimiter_length = length($self->{DELIMITER_PATTERN});
104 18         27 my @cleaned_to_delimited_positions;
105 18         72 while (defined($self->{TOKENS}[$index])) {
106 13383 100       30333 if ($self->{FLAGS}[$index]) { # if the flag is 1, the text is clean
107 5573         9085 my $token = $self->{TOKENS}[$index];
108 5573         6941 my $token_length = length($token);
109              
110             # add the token to the cleaned text with the delimiter
111 5573         7581 $self->{DELIMITED_TEXT} .= $token;
112 5573         6797 $self->{CLEANED_TEXT} .= $token;
113              
114             # now keep the positions
115 5573 50       10763 if ($token_length > 0) {
116 5573         10718 for (my $j = 0; $j < $token_length; $j++) {
117 115973         240134 $cleaned_to_delimited_positions[$cleaned_position + $j] =
118             $delimited_position + $j;
119             # print "".($cleaned_position + $j)." ==> ".
120             # ($delimited_position + $j)."\n";
121             }
122 5573         5400 $cleaned_position += $token_length;
123 5573         6546 $delimited_position += $token_length;
124             }
125             }
126             else {
127             # create the delimiter and add it to the delimited text.
128 7810         11215 my $delimiter = $self->{DELIMITER_PATTERN};
129 7810         14456 my $index_string = sprintf("%09d", $index);
130 7810         18992 $delimiter =~ s/__INDEX__/$index_string/g;
131 7810         12405 $self->{DELIMITED_TEXT} .= $delimiter;
132             # add the delimiter length to the $delimited_position
133 7810         10212 $delimited_position += $delimiter_length;
134             }
135 13383         40442 $index++; # increment the index
136             }
137             # save the translation hash as a data member
138 18         140 $self->{CLEANED_TO_DELIMITED_POSITIONS} = \@cleaned_to_delimited_positions;
139             } # of split
140              
141             #######################
142             # s
143             #
144             # switches:
145             # e Evaluate the right side as an expression.
146             # second e The replacement portion is `eval'ed before being
147             # run as a Perl expression
148             # e is not yet implemented.
149             # g Replace globally, i.e., all occurrences.
150             # i Do case-insensitive pattern matching.
151             # m Treat string as multiple lines.
152             # o Compile pattern only once.
153             # s Treat string as single line.
154             # x Use extended regular expressions.
155             #######################
156             sub s {
157 72     72 1 2334 my $self = shift;
158 72         123 my $pattern = shift;
159 72   50     192 my $replacer = shift || "";
160 72   100     210 my $switches = shift || "";
161              
162             # if there is a g switch, remember it and remove it from
163             # the switches string
164 72         253 my $g_switch = ($switches =~ s/g//g);
165             # the same with e switches
166 72         147 my $e_switch = ($switches =~ s/e//g);
167              
168             # calculate the compiled pattern - include the switches
169 72         7029 my $compiled_pattern = eval("qr/$pattern/$switches");
170              
171             # we build the resulted cleaned text (after replacing) in the buffer
172 72         260 my $buffer = "";
173 72         103 my $last_position = 0; # holds the position just after each match
174              
175             # the translation factor fix the translation table, when we have
176             # a replacer that is different in its size from the match.
177 72         124 $self->{TRANSLATION_POSITION_FACTOR} = 0;
178              
179             # a flag that we use to implement the g switch.
180 72         89 my $no_g_counter = 1;
181              
182             # counter will count the number of substitutions
183 72         90 my $counter = 0;
184              
185             # the main loop where we match and replace
186 72   100     1092 while ($no_g_counter && $self->{CLEANED_TEXT} =~ /$compiled_pattern/gc) {
187 204         325 $no_g_counter = $g_switch;
188             # keep the matching varibales. note that we do not keep the variables
189             # $` and $'.
190 204         3413 my $variables = { 1 => $1,
191             2 => $2,
192             3 => $3,
193             4 => $4,
194             5 => $5,
195             6 => $6,
196             7 => $7,
197             8 => $8,
198             9 => $9,
199             '&' => $& };
200 204         380 my $match = $&; # the match itself
201             # get the position of the end of the match, the start of the match,
202             # and the match length
203 204         453 my $end_match_position = pos($self->{CLEANED_TEXT}) - 1;
204 204         339 my $match_length = length($match);
205 204         317 my $start_match_position = $end_match_position - $match_length + 1;
206             # calculate the replacer and its length
207 204         358 my $this_replacer = $replacer;
208 204         1692 $this_replacer =~ s/\$([0-9])/$variables->{$1}/g;
209            
210             # here I tried to create the e_switch. there are two problems with it:
211             # 1. lexical variables from the code that calls this method are not
212             # available for this method.
213             # 2. I didn't fully understand the way it is done with the real s//
214             # operator - when it is evaled, how to eval the expression without the
215             # variable etc.
216             # so meanwhile, use replace.
217             # if ($e_switch) {
218             # my ($package, $filename, $line) = caller;
219             # if ($e_switch >= 1) {
220             # my $put_quotes = sub {
221             # my $one = shift || "";
222             # my $two = shift || "";
223             # my $three = shift || "";
224             # if ($one eq '"' && $three eq '"') {
225             # return "\'\$$two\'";
226             # }
227             # else {
228             # return "$one\'\$$two\'$three";
229             # }
230             # };
231             # $this_replacer =~
232             # s/(\"?)\$(\w+)(\"?)/&$put_quotes($1, $2, $3)/ge;
233             # $this_replacer =~ s/\&(\w+)/\&$package\:\:$1/g;
234             # print "eval($this_replacer)\n";
235             # $this_replacer = eval($this_replacer);
236             # }
237             # if ($e_switch == 2) {
238             # $this_replacer =~ s/\$(\w+)/\$$package\:\:$1/g;
239             # print "eval($this_replacer)\n";
240             # $this_replacer = eval($this_replacer);
241             # }
242             # }
243              
244 204         428 my $replacer_length = length($this_replacer);
245            
246             # print $self->{CLEANED_TEXT}."\n";
247 204         765 $self->replace(\$buffer,
248             \$last_position,
249             $start_match_position,
250             $end_match_position,
251             $this_replacer);
252 204         10461 $counter++;
253             }
254             # add the rest of the text in the cleaned text
255 72         529 $buffer .= substr($self->{CLEANED_TEXT}, $last_position);
256 72         505 $self->{CLEANED_TEXT} = $buffer;
257 72         494 return $counter;
258             } # of s
259              
260             #####################
261             # replace
262             #####################
263             sub replace {
264 220     220 1 786 my $self = shift;
265 220         361 my $buffer_ref = shift;
266 220         289 my $last_position_ref = shift;
267 220         291 my $start_match_position = shift;
268 220         311 my $end_match_position = shift;
269 220         316 my $this_replacer = shift;
270              
271 220         356 my $match_length = $end_match_position - $start_match_position + 1;
272 220         312 my $replacer_length = length($this_replacer);
273              
274             # print "replace: \$start_match_position=$start_match_position\n";
275             # print "replace: \$end_match_position=$end_match_position\n";
276             # print "replace: \$match_length=$match_length\n";
277             # print "replace: \$replacer_length=$replacer_length\n";
278             # print "replace: \$this_replacer=$this_replacer==\n";
279             # print "replace: TRANSLATION_POSITION_FACTOR=".
280             # $self->{TRANSLATION_POSITION_FACTOR}."\n";
281            
282 220         428 my $translation_array =
283             $self->{CLEANED_TO_DELIMITED_POSITIONS};
284            
285             # build the buffer of the cleaned text
286 220         1273 $$buffer_ref .=
287             substr($self->{CLEANED_TEXT},
288             $$last_position_ref,
289             $start_match_position - $$last_position_ref).$this_replacer;
290            
291             # get the start and end positions of the match in the delimited text
292 220         496 my $delimited_start_match_position =
293             $translation_array->[$start_match_position +
294             $self->{TRANSLATION_POSITION_FACTOR}];
295 220         407 my $delimited_end_match_position =
296             $translation_array->[$end_match_position +
297             $self->{TRANSLATION_POSITION_FACTOR}];
298 220         378 my $delimited_match_length = $delimited_end_match_position -
299             $delimited_start_match_position + 1;
300            
301             # get that delimited text in the matched position
302 220         551 my $delimited_match = substr($self->{DELIMITED_TEXT},
303             $delimited_start_match_position,
304             $delimited_match_length);
305             # print "delimited_match=$delimited_match\n";
306 220         388 my $re = $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION};
307 220         2112 my @delimiters = ($delimited_match =~ /$re/g);
308 220         677 my $delimited_replacer = $this_replacer.join("",@delimiters);
309 220         4536 $self->{DELIMITED_TEXT} =
310             substr($self->{DELIMITED_TEXT},
311             0, $delimited_start_match_position).
312             $delimited_replacer.
313             substr($self->{DELIMITED_TEXT},
314             $delimited_end_match_position + 1);
315            
316             # calculate the translation position factor
317 220         317 my $new_translation_position_factor = $replacer_length - $match_length;
318              
319             # fix the translation array
320             # first of all we should push (or pull) all the indexes after the
321             # replacer.
322 220         293 my $translation_array_size = scalar(@$translation_array);
323            
324 220 50       726 if ($new_translation_position_factor < 0) {
    100          
325             # if the factor is negative we should copy the cells in the array
326             # from the start to the end (so we do not run over the cells we
327             # want to copy from)
328 0         0 for (my $i =
329             $end_match_position + $self->{TRANSLATION_POSITION_FACTOR} + 1;
330             $i < $translation_array_size; $i++) {
331 0         0 $translation_array->[$i + $new_translation_position_factor] =
332             $translation_array->[$i] + $new_translation_position_factor;
333             }
334             }
335             elsif ($new_translation_position_factor > 0) {
336             # if the factor is positive we should copy the cells in the array
337             # from the end to the start (so we do not run over the cells we
338             # want to copy from)
339 184         518 for (my $i = $translation_array_size - 1;
340             $i > $end_match_position + $self->{TRANSLATION_POSITION_FACTOR};
341             $i--) {
342 807732         1789357 $translation_array->[$i + $new_translation_position_factor] =
343             $translation_array->[$i] + $new_translation_position_factor;
344             }
345             }
346              
347             # now we should create the translation for the new replacer. we know,
348             # though, that the replacer is put in the start of the region in the
349             # delimited_text (and after it we put all the delimiters). so the
350             # translation is simple:
351 220         706 for (my $i = 0; $i < $replacer_length; $i++) {
352 5480         12224 $translation_array->[$start_match_position +
353             $self->{TRANSLATION_POSITION_FACTOR} + $i] =
354             $delimited_start_match_position + $i;
355             }
356            
357             # fix the translation position factor for the next replace calls
358 220         439 $self->{TRANSLATION_POSITION_FACTOR} += $new_translation_position_factor;
359              
360             # set the last_position
361 220         1595 $$last_position_ref = $end_match_position + 1;
362             } # of replace
363              
364             #####################
365             # merge
366             #####################
367             sub merge {
368 18     18 1 98 my $self = shift;
369            
370 18         186 my $delimited_text = $self->{DELIMITED_TEXT};
371              
372             # $re will hold the regular expression to match a
373             # delimiter. yet, it will have around the index a paranthesis,
374             # so the index will go to $1 when there is a match.
375 18         70 my $re = quotemeta($self->{DELIMITER_PATTERN});
376 18         98 $re =~ s/__INDEX__/\(\[\\d\]\+\)/g;
377 18         213 $re = qr/$re/;
378             # the buffer will hold the resulted text
379 18         139 my $buffer = $delimited_text;
380             # instead of the pattern, put back the unwanted tokens
381 18         14113 $buffer =~ s/$re/$self->{TOKENS}[$1]/g;
382              
383             # return all the tokens as the text
384 18         467 $self->{TEXT} = $buffer;
385 18         129 return $self->{TEXT};
386             } # of merge
387              
388             1;
389             __END__