File Coverage

blib/lib/DateTime/Event/Predict.pm

Criterion	Covered	Total	%
statement	7	9	77.7
branch			n/a
condition			n/a
subroutine	3	3	100.0
pod			n/a
total	10	12	83.3

line	stmt	sub	time	code
1
2				#==================================================================== --perl--
3				#
4				# DateTime::Event::Predict
5				#
6				# DESCRIPTION
7				# Predict new dates from a set of dates
8				#
9				# AUTHORS
10				# Brian Hann
11				#
12				#===============================================================================
13
14				package DateTime::Event::Predict;
15
16	2	2	71435	use 5.006;
	2		9
	2		73
17
18	2	2	11	use strict;
	2		3
	2		76
19
20	2	2	2249	use DateTime;
	0
	0
21				use Params::Validate qw(:all);
22				use Carp qw(carp croak confess);
23				use Scalar::Util;
24				use Data::Dumper;
25
26				use POSIX qw(ceil);
27
28				use DateTime::Event::Predict::Profile qw(:buckets);
29
30				our $VERSION = '0.01_03';
31
32
33				#===============================================================================#
34
35				sub new {
36				my $proto = shift;
37
38				my %opts = validate(@_, {
39				dates => { type => ARRAYREF, optional => 1 },
40				profile => { type => SCALAR \| OBJECT \| HASHREF, optional => 1 },
41				#stdev_limit => { type => SCALAR, default => 2 },
42				});
43
44				my $class = ref( $proto ) \|\| $proto;
45				my $self = { #Will need to allow for params passed to constructor
46				dates => [],
47				distinct_buckets => {},
48				interval_buckets => {},
49				total_epoch_interval => 0,
50				largest_epoch_interval => 0,
51				smallest_epoch_interval => 0,
52				mean_epoch_interval => 0,
53
54				#Whether this data set has been trained or not
55				trained => 0,
56				};
57				bless($self, $class);
58
59				$opts{profile} = 'default' if ! $opts{profile};
60
61				$self->profile( $opts{profile} );
62
63				return $self;
64				}
65
66				# Get or set list of dates
67				# ***NOTE: Should make this validate for 'can' on the DateTime methods we need and on 'isa' for DateTime
68				sub dates {
69				my $self = shift;
70				my ($dates) = @_;
71
72				validate_pos(@_, { type => ARRAYREF, optional => 1 });
73
74				if (! defined $dates) {
75				return wantarray ? @{$self->{dates}} : $self->{dates};
76				}
77				elsif (defined $dates) {
78				foreach my $date (@$dates) {
79				$self->_trim_date( $date );
80				$self->add_date($date);
81				}
82				}
83
84				return 1;
85				}
86
87				# Add a date to the list of dates
88				sub add_date {
89				my $self = shift;
90				my ($date) = @_;
91
92				validate_pos(@_, { isa => 'DateTime' }); #***Or we could attempt to parse the date, or use can( epoch() );
93
94				$self->_trim_date( $date );
95
96				push(@{ $self->{dates} }, $date);
97
98				return 1;
99				}
100
101				#Get or set the profile for this predictor
102				sub profile {
103				my $self = shift;
104				my ($profile) = @_; # $profile can be a string specifying a profile name that is provided by default, or a profile object, or options to create a new profile
105
106				validate_pos(@_, { type => SCALAR \| OBJECT \| HASHREF, optional => 1 });
107
108				# If no profile is provided, return the current profile
109				if (! defined $profile \|\| ! $profile) { return $self->{profile}; }
110
111				my $new_profile;
112
113				# Profile is an actual DTP::Profile object
114				if (Scalar::Util::blessed($profile) && $profile->can('buckets')) {
115				$new_profile = $profile;
116				}
117				# Profile is a hashref of options to create a new DTP::Profile object with
118				elsif (ref($profile) eq 'HASH') {
119				$new_profile = DateTime::Event::Predict::Profile->new(
120				%$profile,
121				);
122				}
123				# Profile is the name of a profile alias
124				else {
125				$new_profile = DateTime::Event::Predict::Profile->new( profile => $profile );
126				}
127
128				# Add the distinct buckets
129				foreach my $bucket ( $new_profile->_distinct_buckets() ) {
130				$self->{distinct_buckets}->{ $bucket->name } = {
131				accessor => $bucket->accessor,
132				duration => $bucket->duration,
133				order => $bucket->order,
134				weight => $bucket->weight,
135				buckets => {},
136				};
137				}
138
139				# Add the interval buckets
140				foreach my $bucket ( $new_profile->_interval_buckets() ) {
141				$self->{interval_buckets}->{ $bucket->name } = {
142				accessor => $bucket->accessor,
143				order => $bucket->order,
144				weight => $bucket->weight,
145				buckets => {},
146				};
147				}
148
149				$self->{profile} = $new_profile;
150
151				return 1;
152				}
153
154				# Gather statistics about the dates
155				sub train {
156				my $self = shift;
157
158				# Sort the dates chronologically
159				my @dates = sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }; #*** Need to convert this to DateTime->compare($dt1, $dt2)
160
161				# Last and first dates
162				$self->{last_date} = $dates[$#dates];
163				$self->{first_date} = $dates[0];
164
165				# Clear out anything already in the the buckets
166				foreach my $bucket (values %{$self->{distinct_buckets}}, values %{$self->{interval_buckets}} ) {
167				$bucket->{buckets} = {};
168				}
169
170				my $prev_date;
171				foreach my $index (0 .. $#{ $self->{dates} }) {
172				# The date to work on
173				my $date = $dates[ $index ];
174
175				# Get which dates were before and after the date we're working on
176				my ($before, $after);
177				if ($index > 0) { $before = $dates[ $index - 1 ]; }
178				if ($index < $#{ $self->{dates} }) { $after = $dates[ $index + 1 ]; }
179
180				# Increment the date-part buckets
181				while (my ($name, $dbucket) = each %{ $self->{distinct_buckets} }) {
182				# Get the accessor method by using can()
183				my $cref = $date->can( $dbucket->{accessor} );
184				croak "Can't call accessor '" . $dbucket->{accessor} . "' on " . ref($date) . " object" unless $cref;
185
186				# Increment the number of instances for the value given when we use this bucket's accessor on $date
187				$dbucket->{buckets}->{ &$cref($date) }++;
188				}
189
190				# If this is the first date we have nothing to diff, so we'll skip on to the next one
191				if (! $prev_date) { $prev_date = $date; next; }
192
193				# Get a DateTime::Duration object representing the diff between the dates
194				my $dur = $date->subtract_datetime( $prev_date );
195
196				# Increment the interval buckets
197				# Intervals: here we default to the largest interval that we can see. So, for instance, if
198				# there is a difference of months we will not increment anything smaller than that.
199				while (my ($name, $bucket) = each %{ $self->{interval_buckets} }) {
200				my $cref = $dur->can( $bucket->{accessor} );
201				croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
202				my $interval = &$cref($dur);
203				$bucket->{buckets}->{ $interval }++;
204				}
205
206				# Add the difference between dates in epoch seconds
207				my $epoch_interval = $date->hires_epoch() - $prev_date->hires_epoch();
208
209				### Epoch interval: $epoch_interval
210
211				$self->{total_epoch_interval} += $epoch_interval;
212
213				# Set the current date to this date
214				$prev_date = $date;
215				}
216
217				# Average interval between dates in epoch seconds
218				$self->{mean_epoch_interval} = $self->{total_epoch_interval} / (scalar @dates - 1); #Divide total interval by number of intervals
219
220				# Mark this object as being trained
221				$self->{trained}++;
222				}
223
224				sub predict {
225				my $self = shift;
226
227				my %opts = validate(@_, {
228				max_predictions => { type => SCALAR, optional => 1 }, # How many predictions to return
229				stdev_limit => { type => SCALAR, default => 2 }, # Number of standard deviations to search through, default to 2
230				min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
231				callbacks => { type => ARRAYREF, optional => 1 }, # Arrayref of coderefs to call when making predictions
232				});
233
234				# Force max predictions to one if we were called in scalar context
235				if (! defined $opts{'max_predictions'}) {
236				$opts{'max_predictions'} = 1 if ! wantarray;
237				}
238
239				# Train this set of dates if they're not already trained
240				$self->train if ! $self->_is_trained;
241
242				# Make a copy of the distinct and interval bucket hashes so we can mess with them
243				my %distinct_buckets = %{ $self->{distinct_buckets} };
244				my %interval_buckets = %{ $self->{interval_buckets} };
245
246				# Figure the mean, variance, and standard deviation for each bucket
247				foreach my $bucket (values %distinct_buckets, values %interval_buckets) {
248				my ($mean, $variance, $stdev) = $self->_bucket_statistics($bucket);
249
250				$bucket->{mean} = $mean;
251				$bucket->{variance} = $variance;
252				$bucket->{stdev} = $stdev;
253				}
254
255				# Get the most recent of the provided dates by sorting them by their epoch seconds
256				my $most_recent_date = (sort { $b->hires_epoch() <=> $a->hires_epoch() } @{ $self->{dates} })[0];
257
258				# Make a starting search date that has been moved ahead by the average interval beteween dates (in epoch seconds)
259				my $duration = new DateTime::Duration(
260				seconds => $self->{mean_epoch_interval}, # **Might need to round off hires second info here?
261				);
262				my $start_date = $most_recent_date + $duration;
263
264				# A hash of predictions, dates are keyed by their hires_epoch() value
265				my %predictions = ();
266
267				# Start with using the distinct buckets to make predictions
268				if (%distinct_buckets) {
269				# Get a list of buckets after sorting the buckets from largest date part to smallest (i.e. year->month->day->hour ... microsecond, etc)
270				my @distinct_bucket_keys = sort { $self->{distinct_buckets}->{ $b }->{order} <=> $self->{distinct_buckets}->{ $a }->{order} } keys %distinct_buckets;
271
272				# Get the first bucket name
273				my $first_bucket_name = shift @distinct_bucket_keys;
274
275				# Start recursively descending down into the various date parts, searching in each one
276				$self->_date_descend_distinct(
277				%opts,
278
279				date => $start_date,
280				most_recent_date => $most_recent_date,
281				bucket_name => $first_bucket_name,
282				distinct_buckets => \%distinct_buckets,
283				distinct_bucket_keys => \@distinct_bucket_keys,
284				predictions => \%predictions,
285				);
286
287				# Now that we (hopefully) have some predictions, put them each through _interval_check to check
288				# the predictiosn against the interval bucket statistics
289				if (%interval_buckets) {
290				while (my ($hires, $prediction) = each %predictions) {
291				# Delete the date from the predictions hash if it's not good according to the interval statistics
292				if (! $self->_interval_check( $prediction )) {
293				delete $predictions{ $hires };
294				}
295				}
296				}
297				}
298				# No distinct buckets, just interval buckets
299				elsif (%interval_buckets) {
300				# Get a list of buckets after sorting the buckets from largest interval to smallest (i.e. years->months->days->hours, etc)
301				my @interval_bucket_keys = sort { $self->{interval_buckets}->{ $b }->{order} <=> $self->{interval_buckets}->{ $a }->{order} } keys %interval_buckets;
302
303				# Get the first bucket name
304				my $first_bucket_name = shift @interval_bucket_keys;
305
306				# Start recursively descending down into the date interval types, searching in each one
307				$self->_date_descend_interval(
308				%opts,
309
310				date => $start_date,
311				most_recent_date => $most_recent_date,
312				bucket_name => $first_bucket_name,
313				interval_buckets => \%interval_buckets,
314				interval_bucket_keys => \@interval_bucket_keys,
315				predictions => \%predictions,
316				);
317				}
318				# WTF, no buckets. That's bad!
319				else {
320				croak("No buckets supplied!");
321				}
322
323				# Sort the predictions by their total deviation
324				my @predictions = sort { $a->{_dtp_deviation} <=> $b->{_dtp_deviation} } values %predictions;
325
326				return wantarray ? @predictions : $predictions[0];
327				}
328
329				# Descend down into the distinct date parts, looking for predictions
330				sub _date_descend_distinct {
331				my $self = shift;
332				#my %opts = @_;
333
334				# Validate the options
335				my %opts = validate(@_, {
336				date => { isa => 'DateTime' }, # The date to start searching in
337				most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
338				bucket_name => { type => SCALAR }, # The bucket (date-part) to start searching in
339				distinct_buckets => { type => HASHREF }, # A hashref of all buckets to use when looking for good predictions
340				distinct_bucket_keys => { type => ARRAYREF }, # A list of bucket names that we shift out of to get the next bucket to use
341				stdev_limit => { type => SCALAR }, # The limit of how many standard deviations to search through
342				predictions => { type => HASHREF }, # A hashref of predictions we find
343				max_predictions => { type => SCALAR, optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
344				min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
345				callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
346				});
347
348				# Copy the options over into simple scalars so it's easier on my eyes
349				my $date = delete $opts{'date'}; # Delete these ones out as we'll be overwriting them below
350				my $bucket_name = delete $opts{'bucket_name'};
351				my $distinct_buckets = $opts{'distinct_buckets'};
352				my $distinct_bucket_keys = $opts{'distinct_bucket_keys'};
353				my $stdev_limit = $opts{'stdev_limit'};
354				my $predictions = $opts{'predictions'};
355				my $max_predictions = $opts{'max_predictions'};
356				my $callbacks = $opts{'callbacks'};
357
358				# We've reached our max number of predictions, return
359				return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
360
361				# Get the actual bucket hash for this bucket name
362				my $bucket = $distinct_buckets->{ $bucket_name };
363
364				# The search range is the standard deviation multiplied by the number of standard deviations to search through
365				my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
366
367				#The next bucket to search down into
368				my $next_bucket_name = "";
369				if (scalar @$distinct_bucket_keys > 0) {
370				$next_bucket_name = shift @$distinct_bucket_keys;
371				}
372
373				foreach my $search_inc ( 0 .. $search_range ) {
374				# Make an inverted search increment so we can search backwards
375				my $neg_search_inc = $search_inc * -1;
376
377				# Put forwards and backwards in the searches
378				my @searches = ($search_inc, $neg_search_inc);
379
380				# Make sure we only search on 0 once (i.e. 0 * -1 == 0)
381				@searches = (0) if $search_inc == 0;
382
383				foreach my $increment (@searches) {
384				# We've reached our max number of predictions, return
385				return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
386
387				# Make a duration object using the accessor for this bucket
388				my $duration_increment = new DateTime::Duration( $bucket->{duration} => $increment );
389
390				# Get the new date
391				my $new_date = $date + $duration_increment;
392
393				# Trim the date down to just the date parts we care about
394				$self->_trim_date( $new_date );
395
396				# Skip this date if it's before or on the most recent date
397				if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
398				next;
399				}
400
401				# Skip this date if the "min_date" option is set, and it's before or on that date
402				if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
403				next;
404				}
405
406				# If we have no more buckets to search into, determine if this date is a good prediction
407				if (! $next_bucket_name) {
408				if ($self->_distinct_check( %opts, date => $new_date )) {
409				$predictions->{ $new_date->hires_epoch() } = $new_date;
410				}
411				}
412				#If we're not at the smallest bucket, keep searching!
413				else {
414				$self->_date_descend_distinct(
415				%opts,
416				date => $new_date,
417				bucket_name => $next_bucket_name,
418				);
419				}
420				}
421				}
422
423				return 1;
424				}
425
426				# Descend down into the date intervals, looking for predictions
427				sub _date_descend_interval {
428				my $self = shift;
429
430				# Validate the options
431				my %opts = validate(@_, {
432				date => { isa => 'DateTime' }, # The date to start searching in
433				most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
434				bucket_name => { type => SCALAR }, # The bucket (date-part) to start searching in
435				interval_buckets => { type => HASHREF }, # A hashref of all buckets to use when looking for good predictions
436				interval_bucket_keys => { type => ARRAYREF }, # A list of bucket names that we shift out of to get the next bucket to use
437				stdev_limit => { type => SCALAR }, # The limit of how many standard deviations to search through
438				predictions => { type => HASHREF }, # A hashref of predictions we find
439				max_predictions => { type => SCALAR, optional => 1 }, # The maxmimum number of predictions to return (prevents overly long searches)
440				min_date => { isa => 'DateTime', optional => 1 }, # If set, make no prediction before 'min_date'
441				callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
442				});
443
444				# Copy the options over into simple scalars so it's easier on my eyes
445				my $date = delete $opts{'date'}; # Delete these ones out as we'll be overwriting them below
446				my $bucket_name = delete $opts{'bucket_name'};
447				my $interval_buckets = $opts{'interval_buckets'};
448				my $interval_bucket_keys = $opts{'interval_bucket_keys'};
449				my $stdev_limit = $opts{'stdev_limit'};
450				my $predictions = $opts{'predictions'};
451				my $max_predictions = $opts{'max_predictions'};
452				my $callbacks = $opts{'callbacks'};
453
454				# We've reached our max number of predictions, return
455				return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
456
457				# Get the actual bucket hash for this bucket name
458				my $bucket = $interval_buckets->{ $bucket_name };
459
460				# The search range is the standard deviation multiplied by the number of standard deviations to search through
461				my $search_range = ceil( $bucket->{stdev} * $stdev_limit );
462
463				#The next bucket to search down into
464				my $next_bucket_name = "";
465				if (scalar @$interval_bucket_keys > 0) {
466				$next_bucket_name = shift @$interval_bucket_keys;
467				}
468
469				foreach my $search_inc ( 0 .. $search_range ) {
470				# Make an inverted search increment so we can search backwards
471				my $neg_search_inc = $search_inc * -1;
472
473				# Put forwards and backwards in the searches
474				my @searches = ($search_inc, $neg_search_inc);
475
476				# Make sure we only search on 0 once (i.e. 0 * -1 == 0)
477				@searches = (0) if $search_inc == 0;
478
479				foreach my $increment (@searches) {
480				# We've reached our max number of predictions, return
481				return 1 if defined $max_predictions && (scalar keys %$predictions) >= $max_predictions;
482
483				# Make a duration object using the accessor for this bucket
484				my $duration_increment = new DateTime::Duration( $bucket->{accessor} => $increment );
485
486				# Get the new date
487				my $new_date = $date + $duration_increment;
488
489				# Trim the date down to just the date parts we care about
490				$self->_trim_date( $new_date );
491
492				# Skip this date if it's before or on the most recent date
493				if (DateTime->compare( $new_date, $opts{'most_recent_date'} ) <= 0) { # New date is before the most recent one, or is same as most recent one
494				next;
495				}
496
497				# Skip this date if the "min_date" option is set, and it's before or on that date
498				if ($opts{'min_date'} && DateTime->compare($new_date, $opts{'min_date'}) <= 0) {
499				next;
500				}
501
502				# If we have no more buckets to search into, determine if this date is a good prediction
503				if (! $next_bucket_name) {
504				if ($self->_interval_check( %opts, date => $new_date )) {
505				$predictions->{ $new_date->hires_epoch() } = $new_date;
506				}
507				}
508				#If we're not at the smallest bucket, keep searching!
509				else {
510				$self->_date_descend_interval(
511				%opts,
512				date => $new_date,
513				bucket_name => $next_bucket_name,
514				);
515				}
516				}
517				}
518
519				return 1;
520				}
521
522				# Check to see if a given date is good according to the supplied distinct buckets by going through each bucket
523				# and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
524				# each bucket then consider it a good match.
525				sub _distinct_check {
526				my $self = shift;
527
528				# Temporarily allow extra options
529				validation_options( allow_extra => 1 );
530				my %opts = validate(@_, {
531				date => { isa => 'DateTime' }, # The date to check
532				distinct_buckets => { type => HASHREF }, # List of enabled buckets
533				callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
534				});
535				validation_options( allow_extra => 0 );
536
537				my $date = $opts{'date'};
538				my $distinct_buckets = $opts{'distinct_buckets'};
539				my $callbacks = $opts{'callbacks'};
540
541				my $good = 1;
542				my $date_deviation = 0;
543				foreach my $bucket (values %$distinct_buckets) {
544				# Get the value for this bucket's access for the $new_date
545				my $cref = $date->can( $bucket->{accessor} );
546				my $datepart_val = &$cref($date);
547
548				# If the deviation of this datepart from the mean is within the standard deviation,
549				# this date ain't good.
550
551				my $deviation = abs($datepart_val - $bucket->{mean});
552				$date_deviation += $deviation;
553
554				if ($deviation > $bucket->{stdev} ) {
555				$good = 0;
556				last;
557				}
558				}
559
560				# All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
561				if ($good == 1) {
562				# Stick the date's total deviation into the object so it can be used for sorting in predict()
563				$date->{_dtp_deviation} += $date_deviation;
564
565				# Run each hook we were passed
566				foreach my $callback (@$callbacks) {
567				# If any hook returns false, this date is a no-go and we can stop processing it
568				if (! &$callback($date)) {
569				$good = 0;
570				last;
571				}
572				}
573
574				# If the date is still considered good, return true
575				if ($good == 1) {
576				return 1;
577				}
578				# Otherwise return false
579				else {
580				return 0;
581				}
582				}
583				}
584
585				# Check to see if a given date is good according to the supplied interval buckets by going through each bucket
586				# and comparing this date's deviation from that bucket's mean. If it is within the standard deviation for
587				# each bucket then consider it a good match.
588				sub _interval_check {
589				my $self = shift;
590
591				# Temporarily allow extra options
592				validation_options( allow_extra => 1 );
593				my %opts = validate(@_, {
594				date => { isa => 'DateTime' }, # The date prediction to check
595				most_recent_date => { isa => 'DateTime' }, # The most recent date of the dates provided
596				interval_buckets => { type => HASHREF }, # List of enabled interval buckets
597				callbacks => { type => ARRAYREF, optional => 1 }, # A list of custom coderefs that are called on each possible prediction
598				});
599				validation_options( allow_extra => 0 );
600
601				my $date = $opts{'date'};
602				my $most_recent_date = $opts{'most_recent_date'};
603				my $interval_buckets = $opts{'interval_buckets'};
604				my $callbacks = $opts{'callbacks'};
605
606				# Flag specifying whether the predicted date is "good" (within the standard deviation) or not
607				my $good = 1;
608
609				# Total deviation of the predicted date from each of the bucket standard deviations
610				my $date_deviation = 0;
611
612				# Get a duration object for the span between the most recent date supplied and the predicted date
613				my $dur = $date->subtract_datetime( $most_recent_date );
614
615				foreach my $bucket (values %$interval_buckets) {
616				my $cref = $dur->can( $bucket->{accessor} );
617				croak "Can't call accessor '" . $bucket->{accessor} . "' on " . ref($dur) . " object" unless $cref;
618				my $interval = &$cref($dur);
619
620				my $deviation = abs($interval - $bucket->{mean});
621				$date_deviation += $deviation;
622
623				if ($deviation > $bucket->{stdev} ) {
624				$good = 0;
625				last;
626				}
627				}
628
629				# All the dateparts were within their standard deviations, check for callbacks and push this date into the set of predictions
630				if ($good == 1) {
631				# Stick the date's total deviation into the object so it can be used for sorting in predict()
632				$date->{_dtp_deviation} += $date_deviation;
633
634				# Run each hook we were passed
635				foreach my $callback (@$callbacks) {
636				# If any hook returns false, this date is a no-go and we can stop processing it
637				if (! &$callback($date)) {
638				$good = 0;
639				last;
640				}
641				}
642
643				# If the date is still considered good, return true
644				if ($good == 1) {
645				return 1;
646				}
647				# Otherwise return false
648				else {
649				return 0;
650				}
651				}
652				}
653
654				# Get the mean, variance, and standard deviation for a bucket
655				sub _bucket_statistics {
656				my $self = shift;
657				my $bucket = shift;
658
659				my $total = 0;
660				my $count = 0;
661				while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
662				# Gotta loop for each time the value has been found, incrementing the total by the value
663				for (1 .. $occurances) {
664				$total += $value;
665				$count++;
666				}
667				}
668
669				my $mean = $total / $count;
670
671				# Get the variance
672				my $total_variance = 0;
673				while (my ($value, $occurances) = each %{ $bucket->{buckets} }) {
674				# Gotta loop for each time the value has been found
675				my $this_variance = ($value - $mean) ** 2;
676
677				$total_variance += $this_variance * $occurances;
678				}
679
680				my $variance = $total_variance / $count;
681				my $stdev = sqrt($variance);
682
683				return ($mean, $variance, $stdev);
684				}
685
686				# Whether this instance has been trained by train() or not
687				sub _is_trained {
688				my $self = shift;
689
690				return ($self->{trained} > 0) ? 1 : 0;
691				}
692
693				# Utility method to print out the dates added to this instance
694				sub _print_dates {
695				my $self = shift;
696
697				foreach my $date (sort { $a->hires_epoch() <=> $b->hires_epoch() } @{ $self->{dates} }) {
698				print $date->mdy('/') . ' ' . $date->hms . "\n";
699				}
700				}
701
702				# Trim the date parts that are smaller than the smallest one we care about. If we only care about
703				# the year, month, and day, and during the initial search create an offset date that has an hour
704				# or minute that is off from the most recent given date, then when we do a comparison to see if
705				# we're predicting a date we've already been given it's possible that we could have that same
706				# date, just with the hour and second set forward a bit.
707				sub _trim_dates {
708				my $self = shift;
709				my (@dates) = @_;
710
711				# Get the smallest bucket we have turned on
712				my @buckets = (sort { $a->order <=> $b->order } grep { $_->on && $_->trimmable } $self->profile->buckets)[0];
713				my $smallest_bucket = $buckets[0];
714
715				return if ! defined $smallest_bucket \|\| ! $smallest_bucket \|\| ! @buckets;
716
717				foreach my $date (@dates) {
718				confess "Can't trim a non-DateTime value" unless $date->isa( 'DateTime' );
719
720				#foreach my $bucket (grep { $_->trimmable && ($_->order < $smallest_bucket->order) } values %DateTime::Event::Predict::Profile::BUCKETS) {
721				foreach my $bucket (grep { $_->order < $smallest_bucket->order } values %DISTINCT_BUCKETS) {
722				# Clone the date so we don't modify anything we shouldn't
723				$date->clone->truncate( to => $smallest_bucket->accessor );
724				}
725				}
726				}
727
728				# Useless syntactic sugar
729				sub _trim_date { return &_trim_dates(@_); }
730
731				1; # End of DateTime::Event::Predict
732
733				__END__