File Coverage

blib/lib/DateTime/Format/Natural/Extract.pm

Criterion	Covered	Total	%
statement	160	160	100.0
branch	54	56	96.4
condition	48	51	94.1
subroutine	13	13	100.0
pod			n/a
total	275	280	98.2

line	stmt	bran	cond	sub	time	code
1						package DateTime::Format::Natural::Extract;
2
3	26			26	241	use strict;
	26				102
	26				798
4	26			26	147	use warnings;
	26				76
	26				800
5	26				14199	use base qw(
6						DateTime::Format::Natural::Duration::Checks
7						DateTime::Format::Natural::Formatted
8	26			26	139	);
	26				72
9	26			26	212	use boolean qw(true false);
	26				71
	26				129
10
11	26			26	1662	use constant DATE_TYPE => 0x01;
	26				104
	26				1467
12	26			26	180	use constant GRAMMAR_TYPE => 0x02;
	26				62
	26				1315
13	26			26	163	use constant DURATION_TYPE => 0x04;
	26				80
	26				1315
14
15	26			26	196	use DateTime::Format::Natural::Utils qw(trim);
	26				98
	26				60421
16
17						our $VERSION = '0.13';
18
19						my %grammar_durations = map { $_ => true } qw(for_count_unit);
20
21						my $get_range = sub
22						{
23						my ($aref, $index) = @_;
24						return [ grep defined, @$aref[$index, $index + 1] ];
25						};
26
27						my $extract_duration = sub
28						{
29						my ($skip, $indexes, $index) = @_;
30
31						return false unless defined $indexes->[$index] && defined $indexes->[$index + 1];
32						my ($left_index, $right_index) = ($indexes->[$index][1], $indexes->[$index + 1][0]);
33
34						return ($skip->{$left_index} \|\| $skip->{$right_index}) ? false : true;
35						};
36
37						sub _extract_expressions
38						{
39	209			209	507	my $self = shift;
40	209				502	my ($extract_string) = @_;
41
42	209				643	$extract_string =~ s/^[,;.]//;
43	209				564	$extract_string =~ s/[,;.]$//;
44
45	209				1132	while ($extract_string =~ /([,;.])/g) {
46	18				68	my $mark = $1;
47	18				187	my %patterns = (
48						',' => qr/(?!\d{4})/,
49						';' => qr/(?=\w)/,
50						'.' => qr/(?=\w)/,
51						);
52	18				58	my $pattern = $patterns{$mark};
53	18				323	$extract_string =~ s/\Q$mark\E \s+? $pattern/ [token] /x; # pretend punctuation marks are tokens
54						}
55
56	209				1526	my $timespan_sep = $self->{data}->__timespan('literal');
57
58	209				1850	1 while $extract_string =~ s/^$timespan_sep\s+//i;
59	209				1484	1 while $extract_string =~ s/\s+$timespan_sep$//i;
60
61	209				1089	$self->_rewrite(\$extract_string);
62
63	209				2171	my @tokens = split /\s+/, $extract_string;
64	209				440	my %entries = %{$self->{data}->__grammar('')};
	209				1255
65
66	209				992	my (@expressions, %skip);
67
68	209	100			1643	if ($extract_string =~ /\s+ $timespan_sep \s+/ix) {
69	97				578	$self->_extract_duration($extract_string, \@tokens, \@expressions, \%skip);
70						}
71
72	209				2358	my (%expand, %lengths);
73	209				2055	foreach my $keyword (keys %entries) {
74	14003				27033	$expand{$keyword} = $self->_expand_for($keyword);
75	14003				41982	$lengths{$keyword} = @{$entries{$keyword}->[0]};
	14003				28692
76						}
77
78	209				910	my $seen_expression;
79	209				395	do {
80	439				2889	$seen_expression = false;
81	439				1419	my $date_index;
82	439				1940	for (my $i = 0; $i < @tokens; $i++) {
83	1844	100			17125	next if $skip{$i};
84	1085	100			2936	if ($self->_check_for_date($tokens[$i], $i, \$date_index)) {
85	16				221	last;
86						}
87						}
88						GRAMMAR:
89	439				8938	foreach my $keyword (sort { $lengths{$b} <=> $lengths{$a} } grep { $lengths{$_} <= @tokens } keys %entries) {
	98208				142329
	29413				54670
90	19362				397736	my @grammar = @{$entries{$keyword}};
	19362				94293
91	19362				34138	my $types_entry = shift @grammar;
92	19362				54165	my @grammars = [ [ @grammar ], false ];
93	19362	100	100		83751	if ($expand{$keyword} && @$types_entry + 1 <= @tokens) {
94	4448				50437	@grammar = $self->_expand($keyword, $types_entry, \@grammar);
95	4448				19929	unshift @grammars, [ [ @grammar ], true ];
96						}
97	19362				132551	foreach my $grammar (@grammars) {
98	23807				69887	my $expanded = $grammar->[1];
99	23807				49764	my $length = $lengths{$keyword};
100	23807	100			52515	$length++ if $expanded;
101	23807				161850	foreach my $entry (@{$grammar->[0]}) {
	23807				47680
102	150886	100			1055278	my ($types, $expression) = $expanded ? @$entry : ($types_entry, $entry);
103	150886				1140132	my $definition = $expression->[0];
104	150886				286393	my $matched = false;
105	150886				421520	my $pos = 0;
106	150886				228409	my @indexes;
107						my $date_index;
108	150886				323789	for (my $i = 0; $i < @tokens; $i++) {
109	844683	100			5937274	next if $skip{$i};
110	494209	100			997613	last unless defined $types->[$pos];
111	494117	100			1113773	if ($self->_check_for_date($tokens[$i], $i, \$date_index)) {
112	9882				119225	next;
113						}
114	484235	100	66		8572874	if ($types->[$pos] eq 'SCALAR' && defined $definition->{$pos} && $tokens[$i] =~ /^$definition->{$pos}$/i
		100	100
		100	100
			66
			100
115						or $types->[$pos] eq 'REGEXP' && $tokens[$i] =~ $definition->{$pos}
116						&& (@indexes ? ($i - $indexes[-1] == 1) : true)
117						) {
118	8041				95662	$matched = true;
119	8041				27646	push @indexes, $i;
120	8041				22291	$pos++;
121						}
122						elsif ($matched) {
123	5371				39952	last;
124						}
125						}
126	150886	100	100		1165193	if ($matched
		100	100
127						&& @indexes == $length
128						&& (defined $date_index ? ($indexes[0] - $date_index == 1) : true)
129						) {
130	223	100			6555	my $expression = join ' ', (defined $date_index ? $tokens[$date_index] : (), @tokens[@indexes]);
131	223	100			777	my $start_index = defined $date_index ? $indexes[0] - 1 : $indexes[0];
132	223	100			833	my $type = $grammar_durations{$keyword} ? DURATION_TYPE : GRAMMAR_TYPE;
133	223				1421	push @expressions, [ [ $start_index, $indexes[-1] ], $expression, { flags => $type } ];
134	223	100			1522	$skip{$_} = true foreach (defined $date_index ? $date_index : (), @indexes);
135	223				1520	$seen_expression = true;
136	223				2005	last GRAMMAR;
137						}
138						}
139						}
140						}
141	439	100	100		7568	if (defined $date_index && !$seen_expression) {
142	7				171	push @expressions, [ [ ($date_index) x 2 ], $tokens[$date_index], { flags => DATE_TYPE } ];
143	7				32	$skip{$date_index} = true;
144	7				36	$seen_expression = true;
145						}
146						} while ($seen_expression);
147
148	209				2736	return $self->_finalize_expressions(\@expressions, \@tokens);
149						}
150
151						sub _extract_duration
152						{
153	97			97	260	my $self = shift;
154	97				317	my ($extract_string, $tokens, $expressions, $skip) = @_;
155
156	97				647	my $timespan_sep = $self->{data}->__timespan('literal');
157
158	97				1439	my @strings = grep /\S/, map trim($_), split /\b $timespan_sep \b/ix, $extract_string;
159	97	50			440	if (@strings) {
160	97				211	my $index = 0;
161	97				257	my @indexes;
162	97				277	foreach my $string (@strings) {
163	208				880	my @string_tokens = split /\s+/, $string;
164	208				621	push @indexes, [ $index, $index + $#string_tokens ];
165	208				510	$index += $#string_tokens + 1;
166	208		100		2004	$index++ while defined $tokens->[$index] && $tokens->[$index] =~ /^$timespan_sep$/i;
167						}
168						DURATION: {
169	97				236	for (my $i = 0; $i <= $#strings - 1; $i++) {
	160				665
170	179	100			829	next unless $extract_duration->($skip, \@indexes, $i);
171	111				1642	my $save_expression = false;
172	111				478	my @chunks;
173	111				337	foreach my $extract (qw(_first_to_last_extract _from_count_to_count_extract)) {
174	206	100			1987	if ($self->$extract($get_range->(\@strings, $i), $get_range->(\@indexes, $i), $tokens, \@chunks)) {
175	63				1421	$save_expression = true;
176	63				246	last;
177						}
178						}
179	111	100			1758	if ($save_expression) {
180	63				594	my $timespan_sep_index = $chunks[0]->[0][1] + 1;
181	63				284	my $expression = join ' ', ($chunks[0]->[1], $tokens->[$timespan_sep_index], $chunks[1]->[1]);
182	63				220	my @indexes = ($chunks[0]->[0][0], $chunks[1]->[0][1]);
183	63				331	push @$expressions, [ [ @indexes ], $expression, { flags => DURATION_TYPE } ];
184	63				309	$skip->{$_} = true foreach ($indexes[0] .. $indexes[1]);
185	63				1016	redo DURATION;
186						}
187						}
188						}
189						}
190						}
191
192						sub _finalize_expressions
193						{
194	209			209	561	my $self = shift;
195	209				523	my ($expressions, $tokens) = @_;
196
197	209				1668	my $timespan_sep = $self->{data}->__timespan('literal');
198	209				685	my (@duration_indexes, @final_expressions);
199
200	209				738	my $seen_duration = false;
201
202	209				1365	my @expressions = sort { $a->[0][0] <=> $b->[0][0] } @$expressions;
	97				596
203
204	209				905	for (my $i = 0; $i < @expressions; $i++) {
205	293				1097	my $expression = $expressions[$i];
206
207	293				920	my $prev = $expression->[0][0] - 1;
208	293				661	my $next = $expression->[0][1] + 1;
209
210	293	100	100		1932	if ($expression->[2]->{flags} & DATE_TYPE
		50
211						\|\| $expression->[2]->{flags} & GRAMMAR_TYPE
212						) {
213	224	100	100		858	if (!$seen_duration
		100	100
			100
			66
			100
			100
214						&& defined $tokens->[$next]
215						&& $tokens->[$next] =~ /^$timespan_sep$/i
216						&& defined $expressions[$i + 1]
217						&& ($expressions[$i + 1]->[2]->{flags} & DATE_TYPE
218						\|\| $expressions[$i + 1]->[2]->{flags} & GRAMMAR_TYPE)
219						&& $expressions[$i + 1]->[0][0] - $next == 1
220						) {
221	31				1288	push @duration_indexes, ($expression->[0][0] .. $expression->[0][1]);
222	31				129	$seen_duration = true;
223						}
224						elsif ($seen_duration) {
225	31				833	push @duration_indexes, ($prev, $expression->[0][0] .. $expression->[0][1]);
226	31				223	push @final_expressions, join ' ', @$tokens[@duration_indexes];
227	31				84	@duration_indexes = ();
228	31				104	$seen_duration = false;
229						}
230						else {
231	162				4702	push @final_expressions, $expression->[1];
232						}
233						}
234						elsif ($expression->[2]->{flags} & DURATION_TYPE) {
235	69				272	push @final_expressions, $expression->[1];
236						}
237						}
238
239	209			262	1588	my $exclude = sub { $_[0] =~ /^\d{1,2}$/ };
	262				7952
240
241	209				811	return grep !$exclude->($_), @final_expressions;
242						}
243
244						sub _check_for_date
245						{
246	495202			495202	779118	my $self = shift;
247	495202				997528	my ($token, $index, $date_index) = @_;
248
249	495202				2177804	my ($formatted) = $token =~ $self->{data}->__regexes('format');
250	495202				1453224	my %count = $self->_count_separators($formatted);
251	495202	100			1148686	if ($self->_check_formatted('ymd', \%count)) {
252	9898				20044	$$date_index = $index;
253	9898				25903	return true;
254						}
255						else {
256	485304				1169090	return false;
257						}
258						}
259
260						1;
261						__END__
262
263						=head1 NAME
264
265						DateTime::Format::Natural::Extract - Extract parsable expressions from strings
266
267						=head1 SYNOPSIS
268
269						Please see the DateTime::Format::Natural documentation.
270
271						=head1 DESCRIPTION
272
273						C<DateTime::Format::Natural::Extract> extracts expressions from strings to be
274						processed by the parse methods.
275
276						=head1 SEE ALSO
277
278						L<DateTime::Format::Natural>
279
280						=head1 AUTHOR
281
282						Steven Schubiger <schubiger@cpan.org>
283
284						=head1 LICENSE
285
286						This program is free software; you may redistribute it and/or
287						modify it under the same terms as Perl itself.
288
289						See L<http://dev.perl.org/licenses/>
290
291						=cut