File Coverage

blib/lib/Statistics/OnLine.pm
Criterion Covered Total %
statement 44 48 91.6
branch 5 14 35.7
condition n/a
subroutine 10 12 83.3
pod 9 9 100.0
total 68 83 81.9


line stmt bran cond sub pod time code
1             # Copyright 2009 Francesco Nidito. All rights reserved.
2             #
3             # This library is free software; you can redistribute it and/or
4             # modify it under the same terms as Perl itself.
5              
6             package Statistics::OnLine;
7              
8 1     1   5258 use strict;
  1         3  
  1         42  
9              
10 1     1   6 use vars qw($VERSION);
  1         2  
  1         902  
11             $VERSION = '0.02';
12              
13             sub new {
14 1     1 1 77 my $class = shift;
15 1         10 return bless {
16             _count => 0,
17             _mean => 0,
18             _M2 => 0,
19             _M3 => 0,
20             _M4 => 0,
21             version => $VERSION,
22             }, $class;
23             }
24              
25             sub add_data {
26 11     11 1 50 my $self = shift;
27 11         16 foreach my $x (@_) { $self->_update_statistics($x); }
  33         50  
28 11         21 return $self;
29             }
30              
31             sub clean {
32 6     6 1 40 my ($self) = @_;
33 6         5 foreach my $i (grep /^_/, keys %{$self} ){ $self->{$i} = 0; }
  6         40  
  30         36  
34 6         20 return $self;
35             }
36              
37             # fast algorithm to update all the statistics at once:
38             # http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics
39             sub _update_statistics {
40 33     33   39 my ($self, $x) = @_;
41 33         35 my ($mean, $M2, $M3, $M4) = (0, 0, 0, 0);
42              
43 33         35 $self->{_count}++;
44 33         34 my $n = $self->{_count}; # shorter to write $n ;-)
45              
46             # $n**2 and $n**3 efficiently
47 33         33 my $n2 = $n*$n;
48 33         29 my $n3 = $n2*$n;
49              
50 33         44 my $delta = $x - $self->{_mean};
51              
52             # $delta**(2|3|4)... efficiently
53 33         26 my $delta2 = $delta*$delta;
54 33         28 my $delta3 = $delta2*$delta;
55 33         30 my $delta4 = $delta3*$delta;
56              
57 33         43 $mean = $self->{_mean} + $delta/$n;
58 33         42 $M2 = $self->{_M2} + $delta2*($n - 1)/$n;
59 33         70 $M3 = $self->{_M3} + $delta3*($n-1)*($n-2)/$n2 - 3*$delta*$self->{_M2}/$n;
60 33         79 $M4 = $self->{_M4} + $delta4*($n-1)*($n2-3*$n+3)/$n3 + 6*$delta2*$self->{_M2}/$n2 - 4*$delta*$self->{_M3}/$n;
61              
62 33         37 $self->{_mean} = $mean;
63 33         29 $self->{_M2} = $M2;
64 33         31 $self->{_M3} = $M3;
65 33         66 $self->{_M4} = $M4;
66             }
67              
68             sub count {
69 2     2 1 8 return $_[0]->{_count};
70             }
71              
72             sub mean {
73 1 50   1 1 7 die "too few elements to compute mean" if( $_[0]->{_count} == 0 );
74 1         6 return $_[0]->{_mean};
75             }
76              
77             sub variance {
78 0 0   0 1 0 die "too few elements to compute variance" if( $_[0]->{_count} < 2 );
79 0         0 return $_[0]->{_M2}/($_[0]->{_count} - 1);
80             }
81              
82             sub variance_n {
83 0 0   0 1 0 die "too few elements to compute variance_n" if( $_[0]->{_count} == 0 );
84 0         0 return $_[0]->{_M2}/$_[0]->{_count};
85             }
86              
87             sub skewness {
88 2 50   2 1 9 die "too few elements to compute skewness" if( $_[0]->{_count} == 0 );
89 2 50       5 die "variance is zero: cannot compute skewness" if( $_[0]->{_M2} == 0 );
90              
91 2         66 return sqrt( $_[0]->{_count} )*$_[0]->{_M3}/( $_[0]->{_M2}**(3/2));
92             }
93              
94             sub kurtosis {
95 2 50   2 1 7 die "too few elements to compute kurtosis" if( $_[0]->{_count} < 4 );
96 2 50       5 die "variance is zero: cannot compute kurtosis" if( $_[0]->{_M2} == 0 );
97              
98 2         7 return $_[0]->{_count}*$_[0]->{_M4}/($_[0]->{_M2}*$_[0]->{_M2}) - 3;
99             }
100              
101             1;
102              
103             __END__