File Coverage

blib/lib/Date/Extract/Surprise.pm
Criterion Covered Total %
statement 51 51 100.0
branch 14 22 63.6
condition 5 12 41.6
subroutine 11 11 100.0
pod 3 3 100.0
total 84 99 84.8


line stmt bran cond sub pod time code
1 1     1   42401 use strict;
  1         3  
  1         30  
2 1     1   5 use warnings;
  1         2  
  1         41  
3             package Date::Extract::Surprise;
4             BEGIN {
5 1     1   18 $Date::Extract::Surprise::VERSION = '0.006';
6             }
7             BEGIN {
8 1     1   18 $Date::Extract::Surprise::DIST = 'Date-Extract-Surprise';
9             }
10             # ABSTRACT: extract probable dates from strings *with surprises*
11              
12 1     1   4 use Carp qw( croak );
  1         2  
  1         47  
13 1     1   4 use Scalar::Util qw( blessed );
  1         2  
  1         95  
14             # just trying to be helpful.
15             use Exporter::Easy (
16 1         5 OK => [qw( extract_datetimes )],
17 1     1   750 );
  1         1362  
18              
19 1     1   1105 use DateTime::Format::Flexible qw();
  1         278705  
  1         974  
20              
21              
22             sub new {
23 5     5 1 30 my $class = shift;
24 5         28 my $self =
25             bless {
26             DEBUG => 0,
27             @_,
28             },
29             $class;
30              
31 5         13 return $self;
32             }
33              
34              
35              
36             sub extract {
37 6 50   6 1 2208 return unless @_;
38              
39             # can be called as an object method, class method, or function
40             # there's probably better ways to support this.
41 6 50 66     87 my $self = blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ? shift
    100          
42             : $_[0] eq __PACKAGE__ ? shift->new()
43             : croak "Please call as a class or object method!\n";
44              
45 6         14 my $text = shift;
46              
47 6         13 my %args = @_;
48              
49             # set a base date for ambiguous DTs we find, default to epoch.
50             # if a string value is passed and can't be parsed, croak.
51 6 50 33     89 my $base = blessed( $args{base} ) && $args{base}->isa( 'DateTime' ) ? delete $args{base}
    50          
52             : defined $args{base} ? DateTime::Format::Flexible->parse_datetime( $args{base} )
53             : DateTime->new( year => 1970, month => 1, day => 1 );
54              
55 6         1647 my @timestamps; # populate this
56              
57             # there's no immediate need to split into lines, but it should make
58             # some future features easier (like reporting which lines matched)
59 6         28 for my $line ( split /[\n\r]+/, $text ) {
60              
61 6 50       25 warn " {$line}\n" if $self->{DEBUG} > 0;
62              
63             # split it into terms and remove chars that may trip us up
64 6         30 my @terms = map { (my $s = $_) =~ s/[,]/ /g; $s } split q[ ], $line;
  69         106  
  69         125  
65              
66 6         27 for my $i ( 0 .. $#terms ) {
67 69         405092 for my $j ( $i .. $#terms ) {
68 450         1559841 my $search_str = join ' ', @terms[$i .. $j];
69              
70             # clean up other crap that DT::F::F chokes on?
71 450         1604 $search_str =~ s/at//ig;
72              
73             # clean up whitespace
74 450         2159 $search_str =~ s/(\s){2,}/$1/g;
75 450         2822 $search_str =~ s/^\s+|\s+$//g;
76              
77             # it almost certainly has some *numbers* in it!
78 450 100       1852 next unless $search_str =~ /\d/;
79              
80 276 50       1044 warn " {$search_str}\n" if $self->{DEBUG} > 1;
81              
82             # if we can't determine the *date*, assume epoch
83 276         1242 DateTime::Format::Flexible->base( $base );
84              
85 276 100       13229 next unless my $dt = eval {
86 276         1184 DateTime::Format::Flexible->parse_datetime( $search_str );
87             };
88              
89 36         158103 push @timestamps, $dt;
90             }
91             }
92             }
93              
94 6         144 return @timestamps;
95             }
96              
97              
98              
99             sub extract_datetimes {
100 2 50   2 1 1428 return unless @_;
101              
102             # can be called as an object method, class method, or function
103             # there's probably better ways to support this.
104 2 50 33     27 croak "This is a function. Use extract() if you need a method!\n"
      33        
105             if ( blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ) or
106             ( $_[0] eq __PACKAGE__ );
107              
108 2         15 my $self = __PACKAGE__->new();
109              
110 2         7 return $self->extract( @_ );
111             }
112              
113             1 || q{life without coffee isn't worth living}; #truth
114              
115              
116             =pod
117              
118             =head1 NAME
119              
120             Date::Extract::Surprise - extract probable dates from strings *with surprises*
121              
122             =head1 VERSION
123              
124             version 0.006
125              
126             =head1 SYNOPSIS
127              
128             use Date::Extract::Surprise;
129             my $des = Date::Extract::Surprise->new();
130             my @datetimes = $des->extract( $arbitrary_text );
131              
132             # or...
133             use Date::Extract::Surprise;
134             my @datetimes = Date::Extract::Surprise->extract( $arbitrary_text );
135              
136             # or...
137             use Date::Extract::Surprise qw( extract_datetimes );
138             my @datetimes = extract_datetimes( $arbitrary_text );
139              
140             =head1 DESCRIPTION
141              
142             This is modeled on Sartak's excellent L, a proven
143             and capable module that you can use to extract references to dates
144             and times from otherwise arbitrary text. For example:
145              
146             "The package will be delivered at 3:15 PM, March 15, 2007, on the dot."
147              
148             Upon parsing that, you should end up with a L object
149             representing March 15, 2007 at 3:15PM in your timezone.
150              
151             L is designed to try to minimize "false-positives"
152             (ie. detecting things that *aren't* actually dates or times), but
153             at the expense of potentially missing some dates. As its
154             documentation states, "I welcome here.>"
155              
156             Because I had the I need - to find dates in strings I
157             if some were going to be bogus>, I created L
158             which will gladly detect anything that even I like
159             it could be a date or time.
160              
161             B at least I of the dates this will 'detect' in some
162             text should be what you wanted. It's up to you to figure out which one
163             that is! :-)
164              
165             =head1 METHODS
166              
167             =head2 new
168              
169             Just your basic object constructor.
170              
171             my $des = Date::Extract::Surprise->new();
172              
173             Currently takes only one argument:
174              
175             =head2 extract
176              
177             This is designed to (more or less) mirror the interface of
178             Date::Extract->extract(). However, at this time, it supports
179             almost none of its namesake's extra options, and adds one
180             additional option.
181              
182             This can be called as either a class method or as a method on
183             an object, as seen in the L.
184              
185             =head1 FUNCTIONS
186              
187             =head2 extract_datetimes
188              
189             If you're old-skool and prefer things to export a function, you can have it.
190              
191             It takes the same arguments as the L method and returns the same values.
192              
193             =for :list = DEBUG
194             integer greater than 0 for debugging level. higher numbers
195             give more detail
196              
197             It will probably take more in the future.
198              
199             =head1 SEE ALSO
200              
201             =for :list * Date::Extract
202             * DateTime::Format::Flexible
203             * Time::ParseDate
204             * Date::Manip
205              
206             =head1 NOTES
207              
208             Yes, this code is slow and dumb, but it helped me solve a problem and
209             I hope it may help others, too. Let me know if you need anything changed!
210              
211             I'm hoping this will work on perl 5.6 and before, because I want
212             to be helpful to as many people as possible, but I am too lazy
213             to test it myself. Bug reports and/or patches please!
214              
215             =head1 TODO
216              
217             =for :list * more test strings in the tests
218             * support more options from L
219             * more rigorous tests beyond basic functionality
220             * eat a sandwich
221              
222             =head1 AUTHOR
223              
224             Stephen R. Scaffidi
225              
226             =head1 COPYRIGHT AND LICENSE
227              
228             This software is copyright (c) 2010 by Stephen R. Scaffidi.
229              
230             This is free software; you can redistribute it and/or modify it under
231             the same terms as the Perl 5 programming language system itself.
232              
233             =cut
234              
235              
236             __END__