File Coverage

blib/lib/DateTime/Event/Predict.pm
Criterion Covered Total %
statement 7 9 77.7
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 10 12 83.3


line stmt bran cond sub pod time code
1              
2             #==================================================================== -*-perl-*-
3             #
4             # DateTime::Event::Predict
5             #
6             # DESCRIPTION
7             # Predict new dates from a set of dates
8             #
9             # AUTHORS
10             # Brian Hann
11             #
12             #===============================================================================
13              
14             package DateTime::Event::Predict;
15              
16 2     2   71435 use 5.006;
  2         9  
  2         73  
17              
18 2     2   11 use strict;
  2         3  
  2         76  
19              
20 2     2   2249 use DateTime;
  0            
  0            
21             use Params::Validate qw(:all);
22             use Carp qw(carp croak confess);
23             use Scalar::Util;
24             use Data::Dumper;
25              
26             use POSIX qw(ceil);
27              
28             use DateTime::Event::Predict::Profile qw(:buckets);
29              
30             our $VERSION = '0.01_03';
31              
32              
33             #===============================================================================#
34              
35             sub new {
36             my $proto = shift;
37            
38             my %opts = validate(@_, {
39             dates => { type => ARRAYREF, optional => 1 },
40             profile => { type => SCALAR | OBJECT | HASHREF, optional => 1 },
41             #stdev_limit => { type => SCALAR, default => 2 },
42             });
43            
44             my $class = ref( $proto ) || $proto;
45             my $self = { #Will need to allow for params passed to constructor
46             dates => [],
47             distinct_buckets => {},
48             interval_buckets => {},
49             total_epoch_interval => 0,
50             largest_epoch_interval => 0,
51             smallest_epoch_interval => 0,
52             mean_epoch_interval => 0,
53            
54             #Whether this data set has been trained or not
55             trained => 0,
56             };
57             bless($self, $class);
58            
59             $opts{profile} = 'default' if ! $opts{profile};
60            
61             $self->profile( $opts{profile} );
62            
63             return $self;
64             }
65              
66             # Get or set list of dates
67             # ***NOTE: Should make this validate for 'can' on the DateTime methods we need and on 'isa' for DateTime
68             sub dates {
69             my $self = shift;
70             my ($dates) = @_;
71            
72             validate_pos(@_, { type => ARRAYREF, optional => 1 });
73            
74             if (! defined $dates) {
75             return wantarray ? @{$self->{dates}} : $self->{dates};
76             }
77             elsif (defined $dates) {
78             foreach my $date (@$dates) {
79             $self->_trim_date( $date );
80             $self->add_date($date);
81             }
82             }
83            
84             return 1;
85             }
86              
87             # Add a date to the list of dates
88             sub add_date {
89             my $self = shift;
90             my ($date) = @_;
91            
92             validate_pos(@_, { isa => 'DateTime' }); #***Or we could attempt to parse the date, or use can( epoch() );
93            
94             $self->_trim_date( $date );
95            
96             push(@{ $self->{dates} }, $date);
97            
98             return 1;
99             }
100              
101             #Get or set the profile for this predictor
102             sub profile {
103             my $self = shift;
104             my ($profile) = @_; # $profile can be a string specifying a profile name that is provided by default, or a profile object, or options to create a new profile
105            
106             validate_pos(@_, { type => SCALAR | OBJECT | HASHREF, optional => 1 });
107            
108             # If no profile is provided, return the current profile
109             if (! defined $profile || ! $profile) { return $self->{profile}; }
110            
111             my $new_profile;
112            
113             # Profile is an actual DTP::Profile object
114             if (Scalar::Util::blessed($profile) && $profile->can('buckets')) {
115             $new_profile = $profile;
116             }
117             # Profile is a hashref of options to create a new DTP::Profile object with
118             elsif (ref($profile) eq 'HASH') {
119             $new_profile = DateTime::Event::Predict::Profile->new(
120             %$profile,
121             );
122             }
123             # Profile is the name of a profile alias
124             else {
125             $new_profile = DateTime::Event::Predict::Profile->new( profile => $profile );
126             }
127            
128             # Add the distinct buckets
129             foreach my $bucket ( $new_profile->_distinct_buckets() ) {
130             $self->{distinct_buckets}->{ $bucket->name } = {
131             accessor => $bucket->accessor,
132             duration => $bucket->duration,
133             order => $bucket->order,
134             weight => $bucket->weight,
135             buckets => {},
136             };
137             }
138            
139             # Add the interval buckets
140             foreach my $bucket ( $new_profile->_interval_buckets() ) {
141             $self->{interval_buckets}->{ $bucket->name } = {
142             accessor => $bucket->accessor,
143             order => $bucket->order,
144             weight => $bucket->weight,
145             buckets => {},
146             };
147             }
148            
149             $self->{profile} = $new_profile;
150            
151             return 1;
152             }
153              
154             # Gather statistics about the dates
155             sub train {
156             my $self = shift;
157            
158             # Sort the dates chronologically
159             my @dates = sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }; #*** Need to convert this to DateTime->compare($dt1, $dt2)
160            
161             # Last and first dates
162             $self->{last_date} = $dates[$#dates];
163             $self->{first_date} = $dates[0];
164            
165             # Clear out anything already in the the buckets
166             foreach my $bucket (values %{$self->{distinct_buckets}}, values %{$self->{interval_buckets}} ) {
167             $bucket->{buckets} = {};
168             }
169            
170             my $prev_date;
171             foreach my $index (0 .. $#{ $self->{dates} }) {
172             # The date to work on
173             my $date = $dates[ $index ];
174            
175             # Get which dates were before and after the date we're working on
176             my ($before, $after);
177             if ($index > 0) { $before = $dates[ $index - 1 ]; }
178             if ($index < $#{ $self->{dates} }) { $after = $dates[ $index + 1 ]; }
179            
180             # Increment the date-part buckets
181             while (my ($name, $dbucket) = each %{ $self->{distinct_buckets} }) {
182             # Get the accessor method by using can()
183             my $cref = $date->can( $dbucket->{accessor} );
184             croak "Can't call accessor '" . $dbucket->{accessor} . "' on " . ref($date) . " object" unless $cref;
185            
186             # Increment the number of instances for the value given when we use this bucket's accessor on $date
187             $dbucket->{buckets}->{ &$cref($date) }++;
188             }
189            
190             # If this is the first date we have nothing to diff, so we'll skip on to the next one
191             if (! $prev_date) { $prev_date = $date; next; }
192            
193             # Get a DateTime::Duration object representing the diff between the dates
194             my $dur = $date->subtract_datetime( $prev_date );
195            
196             # Increment the interval buckets
197             # Intervals: here we default to the largest interval that we can see. So, for instance, if
198             # there is a difference of months we will not increment anything smaller than that.
199             while (my ($name, $bucket) = each %{ $self->{interval_buckets} }) {
200             my $cref = $dur->can( $bucket->{accessor} );
201             croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
202             my $interval = &$cref($dur);
203             $bucket->{buckets}->{ $interval }++;
204             }
205            
206             # Add the difference between dates in epoch seconds
207             my $epoch_interval = $date->hires_epoch() - $prev_date->hires_epoch();
208            
209             ### Epoch interval: $epoch_interval
210            
211             $self->{total_epoch_interval} += $epoch_interval;
212            
213             # Set the current date to this date
214             $prev_date = $date;
215             }
216            
217             # Average interval between dates in epoch seconds
218             $self->{mean_epoch_interval} = $self->{total_epoch_interval} / (scalar @dates - 1); #Divide total interval by number of intervals
219            
220             # Mark this object as being trained
221             $self->{trained}++;
222             }
223              
224             sub predict {
225             my $self = shift;
226            
227             my %opts = validate(@_, {
228             max_predictions => { type => SCALAR, optional => 1 }, # How many predictions to return
229             stdev_limit => { type => SCALAR, default => 2 }, # Number of standard deviations to search through, default to 2
230             min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
231             callbacks => { type => ARRAYREF, optional => 1 }, # Arrayref of coderefs to call when making predictions
232             });
233            
234             # Force max predictions to one if we were called in scalar context
235             if (! defined $opts{'max_predictions'}) {
236             $opts{'max_predictions'} = 1 if ! wantarray;
237             }
238            
239             # Train this set of dates if they're not already trained
240             $self->train if ! $self->_is_trained;
241            
242             # Make a copy of the distinct and interval bucket hashes so we can mess with them
243             my %distinct_buckets = %{ $self->{distinct_buckets} };
244             my %interval_buckets = %{ $self->{interval_buckets} };
245            
246             # Figure the mean, variance, and standard deviation for each bucket
247             foreach my $bucket (values %distinct_buckets, values %interval_buckets) {
248             my ($mean, $variance, $stdev) = $self->_bucket_statistics($bucket);
249            
250             $bucket->{mean} = $mean;
251             $bucket->{variance} = $variance;
252             $bucket->{stdev} = $stdev;
253             }
254            
255             # Get the most recent of the provided dates by sorting them by their epoch seconds
256             my $most_recent_date = (sort { $b->hires_epoch() <=> $a->hires_epoch() } @{ $self->{dates} })[0];
257            
258             # Make a starting search date that has been moved ahead by the average interval beteween dates (in epoch seconds)
259             my $duration = new DateTime::Duration(
260             seconds => $self->{mean_epoch_interval}, # **Might need to round off hires second info here?
261             );
262             my $start_date = $most_recent_date + $duration;
263            
264             # A hash of predictions, dates are keyed by their hires_epoch() value
265             my %predictions = ();
266            
267             # Start with using the distinct buckets to make predictions
268             if (%distinct_buckets) {
269             # Get a list of buckets after sorting the buckets from largest date part to smallest (i.e. year->month->day->hour ... microsecond, etc)
270             my @distinct_bucket_keys = sort { $self->{distinct_buckets}->{ $b }->{order} <=> $self->{distinct_buckets}->{ $a }->{order} } keys %distinct_buckets;
271            
272             # Get the first bucket name
273             my $first_bucket_name = shift @distinct_bucket_keys;
274            
275             # Start recursively descending down into the various date parts, searching in each one
276             $self->_date_descend_distinct(
277             %opts,
278            
279             date => $start_date,
280             most_recent_date => $most_recent_date,
281             bucket_name => $first_bucket_name,
282             distinct_buckets => \%distinct_buckets,
283             distinct_bucket_keys => \@distinct_bucket_keys,
284             predictions => \%predictions,
285             );
286            
287             # Now that we (hopefully) have some predictions, put them each through _interval_check to check
288             # the predictiosn against the interval bucket statistics
289             if (%interval_buckets) {
290             while (my ($hires, $prediction) = each %predictions) {
291             # Delete the date from the predictions hash if it's not good according to the interval statistics
292             if (! $self->_interval_check( $prediction )) {
293             delete $predictions{ $hires };
294             }
295             }
296             }
297             }
298             # No distinct buckets, just interval buckets
299             elsif (%interval_buckets) {
300             # Get a list of buckets after sorting the buckets from largest interval to smallest (i.e. years->months->days->hours, etc)
301             my @interval_bucket_keys = sort { $self->{interval_buckets}->{ $b }->{order} <=> $self->{interval_buckets}->{ $a }->{order} } keys %interval_buckets;
302            
303             # Get the first bucket name
304             my $first_bucket_name = shift @interval_bucket_keys;
305            
306             # Start recursively descending down into the date interval types, searching in each one
307             $self->_date_descend_interval(
308             %opts,
309            
310             date => $start_date,
311             most_recent_date => $most_recent_date,
312             bucket_name => $first_bucket_name,
313             interval_buckets => \%interval_buckets,
314             interval_bucket_keys => \@interval_bucket_keys,
315             predictions => \%predictions,
316             );
317             }
318             # WTF, no buckets. That's bad!
319             else {
320             croak("No buckets supplied!");
321             }
322            
323             # Sort the predictions by their total deviation
324             my @predictions = sort { $a->{_dtp_deviation} <=> $b->{_dtp_deviation} } values %predictions;
325            
326             return wantarray ? @predictions : $predictions[0];
327             }
328              
329             # Descend down into the distinct date parts, looking for predictions
330             sub _date_descend_distinct {
331             my $self = shift;
332             #my %opts = @_;
333            
334             # Validate the options
335             my %opts = validate(@_, {
336             date => { isa => 'DateTime' }, # The date to start searching in
337             most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
338             bucket_name => { type => SCALAR }, # The bucket (date-part) to start searching in
339             distinct_buckets => { type => HASHREF }, # A hashref of all buckets to use when looking for good predictions
340             distinct_bucket_keys => { type => ARRAYREF }, # A list of bucket names that we shift out of to get the next bucket to use
341             stdev_limit => { type => SCALAR }, # The limit of how many standard deviations to search through
342             predictions => { type => HASHREF }, # A hashref of predictions we find
343             max_predictions => { type => SCALAR, optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
344             min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
345             callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
346             });
347            
348             # Copy the options over into simple scalars so it's easier on my eyes
349             my $date = delete $opts{'date'}; # Delete these ones out as we'll be overwriting them below
350             my $bucket_name = delete $opts{'bucket_name'};
351             my $distinct_buckets = $opts{'distinct_buckets'};
352             my $distinct_bucket_keys = $opts{'distinct_bucket_keys'};
353             my $stdev_limit = $opts{'stdev_limit'};
354             my $predictions = $opts{'predictions'};
355             my $max_predictions = $opts{'max_predictions'};
356             my $callbacks = $opts{'callbacks'};
357            
358             # We've reached our max number of predictions, return
359             return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
360            
361             # Get the actual bucket hash for this bucket name
362             my $bucket = $distinct_buckets->{ $bucket_name };
363            
364             # The search range is the standard deviation multiplied by the number of standard deviations to search through
365             my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
366            
367             #The next bucket to search down into
368             my $next_bucket_name = "";
369             if (scalar @$distinct_bucket_keys > 0) {
370             $next_bucket_name = shift @$distinct_bucket_keys;
371             }
372            
373             foreach my $search_inc ( 0 .. $search_range ) {
374             # Make an inverted search increment so we can search backwards
375             my $neg_search_inc = $search_inc * -1;
376            
377             # Put forwards and backwards in the searches
378             my @searches = ($search_inc, $neg_search_inc);
379            
380             # Make sure we only search on 0 once (i.e. 0 * -1 == 0)
381             @searches = (0) if $search_inc == 0;
382            
383             foreach my $increment (@searches) {
384             # We've reached our max number of predictions, return
385             return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
386            
387             # Make a duration object using the accessor for this bucket
388             my $duration_increment = new DateTime::Duration( $bucket->{duration} => $increment );
389            
390             # Get the new date
391             my $new_date = $date + $duration_increment;
392            
393             # Trim the date down to just the date parts we care about
394             $self->_trim_date( $new_date );
395            
396             # Skip this date if it's before or on the most recent date
397             if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
398             next;
399             }
400            
401             # Skip this date if the "min_date" option is set, and it's before or on that date
402             if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
403             next;
404             }
405            
406             # If we have no more buckets to search into, determine if this date is a good prediction
407             if (! $next_bucket_name) {
408             if ($self->_distinct_check( %opts, date => $new_date )) {
409             $predictions->{ $new_date->hires_epoch() } = $new_date;
410             }
411             }
412             #If we're not at the smallest bucket, keep searching!
413             else {
414             $self->_date_descend_distinct(
415             %opts,
416             date => $new_date,
417             bucket_name => $next_bucket_name,
418             );
419             }
420             }
421             }
422            
423             return 1;
424             }
425              
426             # Descend down into the date intervals, looking for predictions
427             sub _date_descend_interval {
428             my $self = shift;
429            
430             # Validate the options
431             my %opts = validate(@_, {
432             date => { isa => 'DateTime' }, # The date to start searching in
433             most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
434             bucket_name => { type => SCALAR }, # The bucket (date-part) to start searching in
435             interval_buckets => { type => HASHREF }, # A hashref of all buckets to use when looking for good predictions
436             interval_bucket_keys => { type => ARRAYREF }, # A list of bucket names that we shift out of to get the next bucket to use
437             stdev_limit => { type => SCALAR }, # The limit of how many standard deviations to search through
438             predictions => { type => HASHREF }, # A hashref of predictions we find
439             max_predictions => { type => SCALAR, optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
440             min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
441             callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
442             });
443            
444             # Copy the options over into simple scalars so it's easier on my eyes
445             my $date = delete $opts{'date'}; # Delete these ones out as we'll be overwriting them below
446             my $bucket_name = delete $opts{'bucket_name'};
447             my $interval_buckets = $opts{'interval_buckets'};
448             my $interval_bucket_keys = $opts{'interval_bucket_keys'};
449             my $stdev_limit = $opts{'stdev_limit'};
450             my $predictions = $opts{'predictions'};
451             my $max_predictions = $opts{'max_predictions'};
452             my $callbacks = $opts{'callbacks'};
453            
454             # We've reached our max number of predictions, return
455             return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
456            
457             # Get the actual bucket hash for this bucket name
458             my $bucket = $interval_buckets->{ $bucket_name };
459            
460             # The search range is the standard deviation multiplied by the number of standard deviations to search through
461             my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
462            
463             #The next bucket to search down into
464             my $next_bucket_name = "";
465             if (scalar @$interval_bucket_keys > 0) {
466             $next_bucket_name = shift @$interval_bucket_keys;
467             }
468            
469             foreach my $search_inc ( 0 .. $search_range ) {
470             # Make an inverted search increment so we can search backwards
471             my $neg_search_inc = $search_inc * -1;
472            
473             # Put forwards and backwards in the searches
474             my @searches = ($search_inc, $neg_search_inc);
475            
476             # Make sure we only search on 0 once (i.e. 0 * -1 == 0)
477             @searches = (0) if $search_inc == 0;
478            
479             foreach my $increment (@searches) {
480             # We've reached our max number of predictions, return
481             return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
482            
483             # Make a duration object using the accessor for this bucket
484             my $duration_increment = new DateTime::Duration( $bucket->{accessor} => $increment );
485            
486             # Get the new date
487             my $new_date = $date + $duration_increment;
488            
489             # Trim the date down to just the date parts we care about
490             $self->_trim_date( $new_date );
491            
492             # Skip this date if it's before or on the most recent date
493             if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
494             next;
495             }
496            
497             # Skip this date if the "min_date" option is set, and it's before or on that date
498             if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
499             next;
500             }
501            
502             # If we have no more buckets to search into, determine if this date is a good prediction
503             if (! $next_bucket_name) {
504             if ($self->_interval_check( %opts, date => $new_date )) {
505             $predictions->{ $new_date->hires_epoch() } = $new_date;
506             }
507             }
508             #If we're not at the smallest bucket, keep searching!
509             else {
510             $self->_date_descend_interval(
511             %opts,
512             date => $new_date,
513             bucket_name => $next_bucket_name,
514             );
515             }
516             }
517             }
518            
519             return 1;
520             }
521              
522             # Check to see if a given date is good according to the supplied distinct buckets by going through each bucket
523             # and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
524             # each bucket then consider it a good match.
525             sub _distinct_check {
526             my $self = shift;
527            
528             # Temporarily allow extra options
529             validation_options( allow_extra => 1 );
530             my %opts = validate(@_, {
531             date => { isa => 'DateTime' }, # The date to check
532             distinct_buckets => { type => HASHREF }, # List of enabled buckets
533             callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
534             });
535             validation_options( allow_extra => 0 );
536            
537             my $date = $opts{'date'};
538             my $distinct_buckets = $opts{'distinct_buckets'};
539             my $callbacks = $opts{'callbacks'};
540            
541             my $good = 1;
542             my $date_deviation = 0;
543             foreach my $bucket (values %$distinct_buckets) {
544             # Get the value for this bucket's access for the $new_date
545             my $cref = $date->can( $bucket->{accessor} );
546             my $datepart_val = &$cref($date);
547            
548             # If the deviation of this datepart from the mean is within the standard deviation,
549             # this date ain't good.
550            
551             my $deviation = abs($datepart_val - $bucket->{mean});
552             $date_deviation += $deviation;
553            
554             if ($deviation > $bucket->{stdev} ) {
555             $good = 0;
556             last;
557             }
558             }
559            
560             # All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
561             if ($good == 1) {
562             # Stick the date's total deviation into the object so it can be used for sorting in predict()
563             $date->{_dtp_deviation} += $date_deviation;
564            
565             # Run each hook we were passed
566             foreach my $callback (@$callbacks) {
567             # If any hook returns false, this date is a no-go and we can stop processing it
568             if (! &$callback($date)) {
569             $good = 0;
570             last;
571             }
572             }
573            
574             # If the date is still considered good, return true
575             if ($good == 1) {
576             return 1;
577             }
578             # Otherwise return false
579             else {
580             return 0;
581             }
582             }
583             }
584              
585             # Check to see if a given date is good according to the supplied interval buckets by going through each bucket
586             # and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
587             # each bucket then consider it a good match.
588             sub _interval_check {
589             my $self = shift;
590            
591             # Temporarily allow extra options
592             validation_options( allow_extra => 1 );
593             my %opts = validate(@_, {
594             date => { isa => 'DateTime' }, # The date prediction to check
595             most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
596             interval_buckets => { type => HASHREF }, # List of enabled interval buckets
597             callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
598             });
599             validation_options( allow_extra => 0 );
600            
601             my $date = $opts{'date'};
602             my $most_recent_date = $opts{'most_recent_date'};
603             my $interval_buckets = $opts{'interval_buckets'};
604             my $callbacks = $opts{'callbacks'};
605            
606             # Flag specifying whether the predicted date is "good" (within the standard deviation) or not
607             my $good = 1;
608            
609             # Total deviation of the predicted date from each of the bucket standard deviations
610             my $date_deviation = 0;
611            
612             # Get a duration object for the span between the most recent date supplied and the predicted date
613             my $dur = $date->subtract_datetime( $most_recent_date );
614            
615             foreach my $bucket (values %$interval_buckets) {
616             my $cref = $dur->can( $bucket->{accessor} );
617             croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
618             my $interval = &$cref($dur);
619            
620             my $deviation = abs($interval - $bucket->{mean});
621             $date_deviation += $deviation;
622            
623             if ($deviation > $bucket->{stdev} ) {
624             $good = 0;
625             last;
626             }
627             }
628            
629             # All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
630             if ($good == 1) {
631             # Stick the date's total deviation into the object so it can be used for sorting in predict()
632             $date->{_dtp_deviation} += $date_deviation;
633            
634             # Run each hook we were passed
635             foreach my $callback (@$callbacks) {
636             # If any hook returns false, this date is a no-go and we can stop processing it
637             if (! &$callback($date)) {
638             $good = 0;
639             last;
640             }
641             }
642            
643             # If the date is still considered good, return true
644             if ($good == 1) {
645             return 1;
646             }
647             # Otherwise return false
648             else {
649             return 0;
650             }
651             }
652             }
653              
654             # Get the mean, variance, and standard deviation for a bucket
655             sub _bucket_statistics {
656             my $self = shift;
657             my $bucket = shift;
658            
659             my $total = 0;
660             my $count = 0;
661             while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
662             # Gotta loop for each time the value has been found, incrementing the total by the value
663             for (1 .. $occurances) {
664             $total += $value;
665             $count++;
666             }
667             }
668            
669             my $mean = $total / $count;
670            
671             # Get the variance
672             my $total_variance = 0;
673             while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
674             # Gotta loop for each time the value has been found
675             my $this_variance = ($value - $mean) ** 2;
676            
677             $total_variance += $this_variance * $occurances;
678             }
679            
680             my $variance = $total_variance / $count;
681             my $stdev = sqrt($variance);
682            
683             return ($mean, $variance, $stdev);
684             }
685              
686             # Whether this instance has been trained by train() or not
687             sub _is_trained {
688             my $self = shift;
689            
690             return ($self->{trained} > 0) ? 1 : 0;
691             }
692              
693             # Utility method to print out the dates added to this instance
694             sub _print_dates {
695             my $self = shift;
696            
697             foreach my $date (sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }) {
698             print $date->mdy('/') . ' ' . $date->hms . "\n";
699             }
700             }
701              
702             # Trim the date parts that are smaller than the smallest one we care about. If we only care about
703             # the year, month, and day, and during the initial search create an offset date that has an hour
704             # or minute that is off from the most recent given date, then when we do a comparison to see if
705             # we're predicting a date we've already been given it's possible that we could have that same
706             # date, just with the hour and second set forward a bit.
707             sub _trim_dates {
708             my $self = shift;
709             my (@dates) = @_;
710            
711             # Get the smallest bucket we have turned on
712             my @buckets = (sort { $a->order <=> $b->order } grep { $_->on && $_->trimmable } $self->profile->buckets)[0];
713             my $smallest_bucket = $buckets[0];
714            
715             return if ! defined $smallest_bucket || ! $smallest_bucket || ! @buckets;
716            
717             foreach my $date (@dates) {
718             confess "Can't trim a non-DateTime value" unless $date->isa( 'DateTime' );
719            
720             #foreach my $bucket (grep { $_->trimmable && ($_->order < $smallest_bucket->order) } values %DateTime::Event::Predict::Profile::BUCKETS) {
721             foreach my $bucket (grep { $_->order < $smallest_bucket->order } values %DISTINCT_BUCKETS) {
722             # Clone the date so we don't modify anything we shouldn't
723             $date->clone->truncate( to => $smallest_bucket->accessor );
724             }
725             }
726             }
727              
728             # Useless syntactic sugar
729             sub _trim_date { return &_trim_dates(@_); }
730              
731             1; # End of DateTime::Event::Predict
732            
733             __END__