File Coverage

blib/lib/String/Tokenizer.pm

Criterion	Covered	Total	%
statement	117	117	100.0
branch	44	44	100.0
condition	9	14	64.2
subroutine	24	24	100.0
pod	6	6	100.0
total	200	205	97.5

line	stmt	bran	cond	sub	pod	time	code
1
2							package String::Tokenizer;
3
4	2			2		46277	use strict;
	2					5
	2					79
5	2			2		10	use warnings;
	2					4
	2					191
6
7							our $VERSION = '0.05';
8
9	2			2		13	use constant RETAIN_WHITESPACE => 1;
	2					18
	2					176
10	2			2		11	use constant IGNORE_WHITESPACE => 0;
	2					4
	2					1246
11
12							### constructor
13
14							sub new {
15	10			10	1	7921	my ($_class, @args) = @_;
16	10		33			61	my $class = ref($_class) \|\| $_class;
17	10					46	my $string_tokenizer = {
18							tokens => [],
19							delimiter => undef,
20							handle_whitespace => IGNORE_WHITESPACE
21							};
22	10					29	bless($string_tokenizer, $class);
23	10	100				43	$string_tokenizer->tokenize(@args) if @args;
24	10					31	return $string_tokenizer;
25							}
26
27							### methods
28
29							sub setDelimiter {
30	8			8	1	16	my ($self, $delimiter) = @_;
31	8					31	my $delimiter_reg_exp = join "\\|" => map { s/(\W)/\\$1/g; $_ } split // => $delimiter;
	26					149
	26					73
32	8					202	$self->{delimiter} = qr/$delimiter_reg_exp/;
33							}
34
35							sub handleWhitespace {
36	3			3	1	7	my ($self, $value) = @_;
37	3					9	$self->{handle_whitespace} = $value;
38							}
39
40							sub tokenize {
41	10			10	1	932	my ($self, $string, $delimiter, $handle_whitespace) = @_;
42							# if we have a delimiter passed in then use it
43	10	100				49	$self->setDelimiter($delimiter) if defined $delimiter;
44							# if we are asking about whitespace then handle it
45	10	100				41	$self->handleWhitespace($handle_whitespace) if defined $handle_whitespace;
46							# if the two above are not handled, then the object will use
47							# the values set already.
48							# split everything by whitespace no matter what
49							# (possible multiple occurances of white space too)
50	10					13	my @tokens;
51	10	100				34	if ($self->{handle_whitespace}) {
52	2					35	@tokens = split /(\s+)/ => $string;
53							}
54							else {
55	8					55	@tokens = split /\s+/ => $string;
56							}
57	10	100				38	if ($self->{delimiter}) {
58							# create the delimiter reg-ex
59							# escape all non-alpha-numeric
60							# characters, just to be safe
61	9					18	my $delimiter = $self->{delimiter};
62							# loop through the tokens
63							@tokens = map {
64							# if the token contains a delimiter then ...
65	9	100				15	if (/$delimiter/) {
	148					592
66	60					64	my ($token, @_tokens);
67							# split the token up into characters
68							# and the loop through all the characters
69	60					146	foreach my $char (split //) {
70							# if the character is a delimiter
71	196	100				839	if ($char =~ /^$delimiter$/) {
72							# and we already have a token in the works
73	75	100	66			289	if (defined($token) && $token =~ /^.*$/) {
74							# add the token to the
75							# temp tokens list
76	31					47	push @_tokens => $token;
77							}
78							# and then push our delimiter character
79							# onto the temp tokens list
80	75					96	push @_tokens => $char;
81							# now we need to undefine our token
82	75					163	$token = undef;
83							}
84							# if the character is not a delimiter then
85							else {
86							# check to make sure the token is defined
87	121	100				236	$token = "" unless defined $token;
88							# and then add the chracter to it
89	121					222	$token .= $char;
90							}
91							}
92							# now push any remaining token onto
93							# the temp tokens list
94	60	100				158	push @_tokens => $token if defined $token;
95							# and return tokens
96	60					193	@_tokens;
97							}
98							# if our token does not have
99							# the delimiter in it
100							else {
101							# just return it
102	88					201	$_
103							}
104							} @tokens;
105							}
106	10					53	$self->{tokens} = \@tokens;
107							}
108
109							sub getTokens {
110	6			6	1	1544	my ($self) = @_;
111							return wantarray ?
112	6	100				34	@{$self->{tokens}}
	1					91
113							:
114							$self->{tokens};
115							}
116
117							sub iterator {
118	4			4	1	2440	my ($self) = @_;
119							# returns a copy of the array
120	4					19	return String::Tokenizer::Iterator->new($self->{tokens});
121							}
122
123							package String::Tokenizer::Iterator;
124
125	2			2		16	use strict;
	2					3
	2					71
126	2			2		9	use warnings;
	2					4
	2					1731
127
128							sub new {
129	5	100		5		56	((caller())[0] eq "String::Tokenizer")
130							\|\| die "Insufficient Access Priviledges : Only String::Tokenizer can create String::Tokenizer::Iterator instances";
131	4					36	my ($_class, $tokens) = @_;
132	4		33			55	my $class = ref($_class) \|\| $_class;
133	4					14	my $iterator = {
134							tokens => $tokens,
135							index => 0
136							};
137	4					13	bless($iterator, $class);
138	4					13	return $iterator;
139							}
140
141							sub reset {
142	1			1		2	my ($self) = @_;
143	1					4	$self->{index} = 0;
144							}
145
146							sub hasNextToken {
147	108			108		5207	my ($self) = @_;
148	108	100				199	return ($self->{index} < scalar @{$self->{tokens}}) ? 1 : 0;
	108					359
149							}
150
151							sub hasPrevToken {
152	26			26		35	my ($self) = @_;
153	26					58	return ($self->{index} > 0);
154							}
155
156							sub nextToken {
157	118			118		1566	my ($self) = @_;
158	118	100				166	return undef if ($self->{index} >= scalar @{$self->{tokens}});
	118					296
159	117					469	return $self->{tokens}->[$self->{index}++];
160							}
161
162							sub prevToken {
163	26			26		93	my ($self) = @_;
164	26	100				59	return undef if ($self->{index} <= 0);
165	25					103	return $self->{tokens}->[--$self->{index}];
166							}
167
168							sub currentToken {
169	25			25		74	my ($self) = @_;
170	25					78	return $self->{tokens}->[$self->{index} - 1];
171							}
172
173							sub lookAheadToken {
174	41			41		26505	my ($self) = @_;
175	40					175	return undef if ( $self->{index} <= 0
176	41	100	100			163	\|\| $self->{index} >= scalar @{$self->{tokens}});
177	38					196	return $self->{tokens}->[$self->{index}];
178							}
179
180							sub collectTokensUntil {
181	5			5		12	my ($self, $token_to_match) = @_;
182							# if this matches our current token ...
183							# then we just return nothing as there
184							# is nothing to accumulate
185	5	100				12	if ($self->lookAheadToken() eq $token_to_match) {
186							# then just advance it one
187	1					4	$self->nextToken();
188							# and return nothing
189	1					5	return;
190							}
191
192							# if it doesnt match our current token then, ...
193	4					4	my @collection;
194							# store the index we start at
195	4					8	my $old_index = $self->{index};
196	4					4	my $matched;
197							# loop through the tokens
198	4					9	while ($self->hasNextToken()) {
199	23					42	my $token = $self->nextToken();
200	23	100				48	if ($token ne $token_to_match) {
201	20					52	push @collection => $token;
202							}
203							else {
204	3					4	$matched++;
205	3					6	last;
206							}
207							}
208	4	100				10	unless ($matched) {
209							# reset back to where we started, and ...
210	1					3	$self->{index} = $old_index;
211							# and return nothing
212	1					7	return;
213							}
214							# and return our collection
215	3					36	return @collection;
216							}
217
218
219							sub skipTokensUntil {
220	3			3		7	my ($self, $token_to_match) = @_;
221							# if this matches our current token ...
222	3	100				9	if ($self->lookAheadToken() eq $token_to_match) {
223							# then just advance it one
224	1					4	$self->nextToken();
225							# and return success
226	1					6	return 1;
227							}
228							# if it doesnt match our current token then, ...
229							# store the index we start at
230	2					3	my $old_index = $self->{index};
231							# and loop through the tokens
232	2					7	while ($self->hasNextToken()) {
233							# return success if we match our token
234	18	100				31	return 1 if ($self->nextToken() eq $token_to_match);
235							}
236							# otherwise we didnt match, and should
237							# reset back to where we started, and ...
238	1					3	$self->{index} = $old_index;
239							# return failure
240	1					6	return 0;
241							}
242
243							sub skipTokenIfWhitespace {
244	4			4		6	my ($self) = @_;
245	4	100				10	$self->{index}++ if $self->lookAheadToken() =~ /^\s+$/;
246							}
247
248							sub skipTokens {
249	13			13		41	my ($self, $num_token_to_skip) = @_;
250	13		100			52	$num_token_to_skip \|\|= 1;
251	13					35	$self->{index} += $num_token_to_skip;
252							}
253
254							*skipToken = \&skipTokens;
255
256							1;
257
258							__END__