File Coverage

blib/lib/Regexp/Ignore.pm

Criterion	Covered	Total	%
statement	126	135	93.3
branch	10	16	62.5
condition	7	10	70.0
subroutine	12	14	85.7
pod	10	11	90.9
total	165	186	88.7

line	stmt	bran	cond	sub	pod	time	code
1							package Regexp::Ignore;
2
3	2			2		49	use 5.006;
	2					6
	2					80
4	2			2		11	use strict;
	2					4
	2					60
5	2			2		14	use warnings;
	2					3
	2					3581
6
7							our $VERSION = '0.03';
8
9							#############################################################
10							# new($original_text, $delimiter_pattern)
11							#############################################################
12							# the constructor
13							sub new {
14	18			18	1	38	my $proto = shift; # get the class name
15	18		33			84	my $class = ref($proto) \|\| $proto;
16	18					42	my $self = {};
17
18	18					91	$self->{TEXT} = shift;
19	18					47	$self->{DELIMITER_PATTERN} = shift;
20
21	18					42	$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} = $self->{DELIMITER_PATTERN};
22	18					56	$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =
23							quotemeta($self->{DELIMITER_PATTERN_REGULAR_EXPRESSION});
24	18					95	$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =~
25							s/__INDEX__/$\?\:[\\d\]\+$/g;
26	18					160	$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION} =
27							qr/$self->{DELIMITER_PATTERN_REGULAR_EXPRESSION}/;
28	18					39	$self->{TRANSLATION_POSITION_FACTOR} = 0;
29	18					49	bless ($self, $class);
30	18					60	return $self;
31							} # of new
32
33							#################
34							# text
35							#################
36							sub text {
37	36			36	1	2068	my $self = shift;
38	36	50				103	if (@_) { $self->{TEXT} = shift }
	0					0
39	36					536	return $self->{TEXT};
40							} # of text
41
42							#################
43							# delimited_text
44							#################
45							sub delimited_text {
46	18			18	1	5249	my $self = shift;
47	18					296	return $self->{DELIMITED_TEXT};
48							} # of delimited_text
49
50							#################
51							# cleaned_text
52							#################
53							sub cleaned_text {
54	72			72	1	2741	my $self = shift;
55	72	100				189	if (@_) { $self->{CLEANED_TEXT} = shift }
	18					66
56	72					599	return $self->{CLEANED_TEXT};
57							} # of cleaned_text
58
59							######################
60							# delimiter_pattern
61							######################
62							sub delimiter_pattern {
63	0			0	1	0	my $self = shift;
64	0	0				0	if (@_) { $self->{DELIMITER_PATTERN} = shift }
	0					0
65	0					0	return $self->{DELIMITER_PATTERN};
66							} # of delimiter_pattern
67
68							######################
69							# translation_position_factor
70							######################
71							sub translation_position_factor {
72	18			18	0	10747	my $self = shift;
73	18	50				72	if (@_) { $self->{TRANSLATION_POSITION_FACTOR} = shift }
	18					37
74	18					51	return $self->{TRANSLATION_POSITION_FACTOR};
75							} # of translation_position_factor
76
77							########################
78							# get_tokens
79							########################
80							sub get_tokens {
81	0			0	1	0	my $self = shift;
82	0					0	die("This is an abstract method");
83							} # of get_tokens
84
85							#####################
86							# split
87							#####################
88							sub split {
89	18			18	1	2398	my $self = shift;
90							# get the tokens
91	18					59	$self->{TOKENS} = [];
92	18					49	$self->{FLAGS} = [];
93	18					74	($self->{TOKENS}, $self->{FLAGS}) = $self->get_tokens();
94
95							# now build the delimited text which will hold the wanted tokens and
96							# for each token, a delimiter just before it, that represents the token.
97							# build also the cleaned text and the positions translation between them
98	18					84	$self->{DELIMITED_TEXT} = "";
99	18					45	$self->{CLEANED_TEXT} = "";
100	18					30	my $index = 0;
101	18					32	my $cleaned_position = 0;
102	18					47	my $delimited_position = 0;
103	18					46	my $delimiter_length = length($self->{DELIMITER_PATTERN});
104	18					27	my @cleaned_to_delimited_positions;
105	18					72	while (defined($self->{TOKENS}[$index])) {
106	13383	100				30333	if ($self->{FLAGS}[$index]) { # if the flag is 1, the text is clean
107	5573					9085	my $token = $self->{TOKENS}[$index];
108	5573					6941	my $token_length = length($token);
109
110							# add the token to the cleaned text with the delimiter
111	5573					7581	$self->{DELIMITED_TEXT} .= $token;
112	5573					6797	$self->{CLEANED_TEXT} .= $token;
113
114							# now keep the positions
115	5573	50				10763	if ($token_length > 0) {
116	5573					10718	for (my $j = 0; $j < $token_length; $j++) {
117	115973					240134	$cleaned_to_delimited_positions[$cleaned_position + $j] =
118							$delimited_position + $j;
119							# print "".($cleaned_position + $j)." ==> ".
120							# ($delimited_position + $j)."\n";
121							}
122	5573					5400	$cleaned_position += $token_length;
123	5573					6546	$delimited_position += $token_length;
124							}
125							}
126							else {
127							# create the delimiter and add it to the delimited text.
128	7810					11215	my $delimiter = $self->{DELIMITER_PATTERN};
129	7810					14456	my $index_string = sprintf("%09d", $index);
130	7810					18992	$delimiter =~ s/__INDEX__/$index_string/g;
131	7810					12405	$self->{DELIMITED_TEXT} .= $delimiter;
132							# add the delimiter length to the $delimited_position
133	7810					10212	$delimited_position += $delimiter_length;
134							}
135	13383					40442	$index++; # increment the index
136							}
137							# save the translation hash as a data member
138	18					140	$self->{CLEANED_TO_DELIMITED_POSITIONS} = \@cleaned_to_delimited_positions;
139							} # of split
140
141							#######################
142							# s
143							#
144							# switches:
145							# e Evaluate the right side as an expression.
146							# second e The replacement portion is `eval'ed before being
147							# run as a Perl expression
148							# e is not yet implemented.
149							# g Replace globally, i.e., all occurrences.
150							# i Do case-insensitive pattern matching.
151							# m Treat string as multiple lines.
152							# o Compile pattern only once.
153							# s Treat string as single line.
154							# x Use extended regular expressions.
155							#######################
156							sub s {
157	72			72	1	2334	my $self = shift;
158	72					123	my $pattern = shift;
159	72		50			192	my $replacer = shift \|\| "";
160	72		100			210	my $switches = shift \|\| "";
161
162							# if there is a g switch, remember it and remove it from
163							# the switches string
164	72					253	my $g_switch = ($switches =~ s/g//g);
165							# the same with e switches
166	72					147	my $e_switch = ($switches =~ s/e//g);
167
168							# calculate the compiled pattern - include the switches
169	72					7029	my $compiled_pattern = eval("qr/$pattern/$switches");
170
171							# we build the resulted cleaned text (after replacing) in the buffer
172	72					260	my $buffer = "";
173	72					103	my $last_position = 0; # holds the position just after each match
174
175							# the translation factor fix the translation table, when we have
176							# a replacer that is different in its size from the match.
177	72					124	$self->{TRANSLATION_POSITION_FACTOR} = 0;
178
179							# a flag that we use to implement the g switch.
180	72					89	my $no_g_counter = 1;
181
182							# counter will count the number of substitutions
183	72					90	my $counter = 0;
184
185							# the main loop where we match and replace
186	72		100			1092	while ($no_g_counter && $self->{CLEANED_TEXT} =~ /$compiled_pattern/gc) {
187	204					325	$no_g_counter = $g_switch;
188							# keep the matching varibales. note that we do not keep the variables
189							# $` and $'.
190	204					3413	my $variables = { 1 => $1,
191							2 => $2,
192							3 => $3,
193							4 => $4,
194							5 => $5,
195							6 => $6,
196							7 => $7,
197							8 => $8,
198							9 => $9,
199							'&' => $& };
200	204					380	my $match = $&; # the match itself
201							# get the position of the end of the match, the start of the match,
202							# and the match length
203	204					453	my $end_match_position = pos($self->{CLEANED_TEXT}) - 1;
204	204					339	my $match_length = length($match);
205	204					317	my $start_match_position = $end_match_position - $match_length + 1;
206							# calculate the replacer and its length
207	204					358	my $this_replacer = $replacer;
208	204					1692	$this_replacer =~ s/\$([0-9])/$variables->{$1}/g;
209
210							# here I tried to create the e_switch. there are two problems with it:
211							# 1. lexical variables from the code that calls this method are not
212							# available for this method.
213							# 2. I didn't fully understand the way it is done with the real s//
214							# operator - when it is evaled, how to eval the expression without the
215							# variable etc.
216							# so meanwhile, use replace.
217							# if ($e_switch) {
218							# my ($package, $filename, $line) = caller;
219							# if ($e_switch >= 1) {
220							# my $put_quotes = sub {
221							# my $one = shift \|\| "";
222							# my $two = shift \|\| "";
223							# my $three = shift \|\| "";
224							# if ($one eq '"' && $three eq '"') {
225							# return "\'\$$two\'";
226							# }
227							# else {
228							# return "$one\'\$$two\'$three";
229							# }
230							# };
231							# $this_replacer =~
232							# s/(\"?)\$(\w+)(\"?)/&$put_quotes($1, $2, $3)/ge;
233							# $this_replacer =~ s/\&(\w+)/\&$package\:\:$1/g;
234							# print "eval($this_replacer)\n";
235							# $this_replacer = eval($this_replacer);
236							# }
237							# if ($e_switch == 2) {
238							# $this_replacer =~ s/\$(\w+)/\$$package\:\:$1/g;
239							# print "eval($this_replacer)\n";
240							# $this_replacer = eval($this_replacer);
241							# }
242							# }
243
244	204					428	my $replacer_length = length($this_replacer);
245
246							# print $self->{CLEANED_TEXT}."\n";
247	204					765	$self->replace(\$buffer,
248							\$last_position,
249							$start_match_position,
250							$end_match_position,
251							$this_replacer);
252	204					10461	$counter++;
253							}
254							# add the rest of the text in the cleaned text
255	72					529	$buffer .= substr($self->{CLEANED_TEXT}, $last_position);
256	72					505	$self->{CLEANED_TEXT} = $buffer;
257	72					494	return $counter;
258							} # of s
259
260							#####################
261							# replace
262							#####################
263							sub replace {
264	220			220	1	786	my $self = shift;
265	220					361	my $buffer_ref = shift;
266	220					289	my $last_position_ref = shift;
267	220					291	my $start_match_position = shift;
268	220					311	my $end_match_position = shift;
269	220					316	my $this_replacer = shift;
270
271	220					356	my $match_length = $end_match_position - $start_match_position + 1;
272	220					312	my $replacer_length = length($this_replacer);
273
274							# print "replace: \$start_match_position=$start_match_position\n";
275							# print "replace: \$end_match_position=$end_match_position\n";
276							# print "replace: \$match_length=$match_length\n";
277							# print "replace: \$replacer_length=$replacer_length\n";
278							# print "replace: \$this_replacer=$this_replacer==\n";
279							# print "replace: TRANSLATION_POSITION_FACTOR=".
280							# $self->{TRANSLATION_POSITION_FACTOR}."\n";
281
282	220					428	my $translation_array =
283							$self->{CLEANED_TO_DELIMITED_POSITIONS};
284
285							# build the buffer of the cleaned text
286	220					1273	$$buffer_ref .=
287							substr($self->{CLEANED_TEXT},
288							$$last_position_ref,
289							$start_match_position - $$last_position_ref).$this_replacer;
290
291							# get the start and end positions of the match in the delimited text
292	220					496	my $delimited_start_match_position =
293							$translation_array->[$start_match_position +
294							$self->{TRANSLATION_POSITION_FACTOR}];
295	220					407	my $delimited_end_match_position =
296							$translation_array->[$end_match_position +
297							$self->{TRANSLATION_POSITION_FACTOR}];
298	220					378	my $delimited_match_length = $delimited_end_match_position -
299							$delimited_start_match_position + 1;
300
301							# get that delimited text in the matched position
302	220					551	my $delimited_match = substr($self->{DELIMITED_TEXT},
303							$delimited_start_match_position,
304							$delimited_match_length);
305							# print "delimited_match=$delimited_match\n";
306	220					388	my $re = $self->{DELIMITER_PATTERN_REGULAR_EXPRESSION};
307	220					2112	my @delimiters = ($delimited_match =~ /$re/g);
308	220					677	my $delimited_replacer = $this_replacer.join("",@delimiters);
309	220					4536	$self->{DELIMITED_TEXT} =
310							substr($self->{DELIMITED_TEXT},
311							0, $delimited_start_match_position).
312							$delimited_replacer.
313							substr($self->{DELIMITED_TEXT},
314							$delimited_end_match_position + 1);
315
316							# calculate the translation position factor
317	220					317	my $new_translation_position_factor = $replacer_length - $match_length;
318
319							# fix the translation array
320							# first of all we should push (or pull) all the indexes after the
321							# replacer.
322	220					293	my $translation_array_size = scalar(@$translation_array);
323
324	220	50				726	if ($new_translation_position_factor < 0) {
		100
325							# if the factor is negative we should copy the cells in the array
326							# from the start to the end (so we do not run over the cells we
327							# want to copy from)
328	0					0	for (my $i =
329							$end_match_position + $self->{TRANSLATION_POSITION_FACTOR} + 1;
330							$i < $translation_array_size; $i++) {
331	0					0	$translation_array->[$i + $new_translation_position_factor] =
332							$translation_array->[$i] + $new_translation_position_factor;
333							}
334							}
335							elsif ($new_translation_position_factor > 0) {
336							# if the factor is positive we should copy the cells in the array
337							# from the end to the start (so we do not run over the cells we
338							# want to copy from)
339	184					518	for (my $i = $translation_array_size - 1;
340							$i > $end_match_position + $self->{TRANSLATION_POSITION_FACTOR};
341							$i--) {
342	807732					1789357	$translation_array->[$i + $new_translation_position_factor] =
343							$translation_array->[$i] + $new_translation_position_factor;
344							}
345							}
346
347							# now we should create the translation for the new replacer. we know,
348							# though, that the replacer is put in the start of the region in the
349							# delimited_text (and after it we put all the delimiters). so the
350							# translation is simple:
351	220					706	for (my $i = 0; $i < $replacer_length; $i++) {
352	5480					12224	$translation_array->[$start_match_position +
353							$self->{TRANSLATION_POSITION_FACTOR} + $i] =
354							$delimited_start_match_position + $i;
355							}
356
357							# fix the translation position factor for the next replace calls
358	220					439	$self->{TRANSLATION_POSITION_FACTOR} += $new_translation_position_factor;
359
360							# set the last_position
361	220					1595	$$last_position_ref = $end_match_position + 1;
362							} # of replace
363
364							#####################
365							# merge
366							#####################
367							sub merge {
368	18			18	1	98	my $self = shift;
369
370	18					186	my $delimited_text = $self->{DELIMITED_TEXT};
371
372							# $re will hold the regular expression to match a
373							# delimiter. yet, it will have around the index a paranthesis,
374							# so the index will go to $1 when there is a match.
375	18					70	my $re = quotemeta($self->{DELIMITER_PATTERN});
376	18					98	$re =~ s/__INDEX__/$\[\\d\]\+$/g;
377	18					213	$re = qr/$re/;
378							# the buffer will hold the resulted text
379	18					139	my $buffer = $delimited_text;
380							# instead of the pattern, put back the unwanted tokens
381	18					14113	$buffer =~ s/$re/$self->{TOKENS}[$1]/g;
382
383							# return all the tokens as the text
384	18					467	$self->{TEXT} = $buffer;
385	18					129	return $self->{TEXT};
386							} # of merge
387
388							1;
389							__END__