File Coverage

blib/lib/Dumbbench/Stats.pm
Criterion Covered Total %
statement 56 72 77.7
branch 5 6 83.3
condition 2 2 100.0
subroutine 16 20 80.0
pod 0 15 0.0
total 79 115 68.7


line stmt bran cond sub pod time code
1             package Dumbbench::Stats;
2 3     3   20 use strict;
  3         7  
  3         105  
3 3     3   17 use warnings;
  3         4  
  3         74  
4 3     3   15 use List::Util ();
  3         5  
  3         36  
5 3     3   1439 use Statistics::CaseResampling ();
  3         1961  
  3         122  
6              
7             use Class::XSAccessor {
8 3         20 constructor => 'new',
9             accessors => [qw/data name/],
10 3     3   19 };
  3         7  
11              
12             # Note: This is entirely unoptimized. There is a lot of unnecessary
13             # stuff going on. This is to allow the user to modify the data
14             # set in flight. If this comes back to haunt us at some point,
15             # we can still optimize, but at this point, convenience still wins.
16              
17             sub sorted_data {
18 0     0 0 0 my $self = shift;
19 0         0 my $sorted = [sort { $a <=> $b } @{$self->data}];
  0         0  
  0         0  
20 0         0 return $sorted;
21             }
22              
23 2     2 0 52 sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
24 2     2 0 1307 sub second_quartile { return $_[0]->median }
25 2     2 0 23 sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
26              
27              
28 17     17 0 23 sub n { scalar(@{$_[0]->data}) }
  17         53  
29              
30             sub sum {
31 17     17 0 21 my $self = shift;
32 17         22 return List::Util::sum(@{$self->data});
  17         72  
33             }
34              
35             sub min {
36 0     0 0 0 my $self = shift;
37 0         0 return List::Util::min(@{$self->data});
  0         0  
38             }
39              
40             sub max {
41 0     0 0 0 my $self = shift;
42 0         0 return List::Util::max(@{$self->data});
  0         0  
43             }
44              
45             sub mean {
46 17     17 0 2482 my $self = shift;
47 17         30 return $self->sum / $self->n;
48             }
49              
50 73     73 0 257 sub median { Statistics::CaseResampling::median($_[0]->data) } # O(n)!
51              
52             sub median_confidence_limits {
53 0     0 0 0 my $self = shift;
54 0         0 my $nsigma = shift;
55 0         0 my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
56             # note: The 1000 here is kind of a lower limit for reasonable accuracy.
57             # But if the data set is small, that's more significant. If the data
58             # set is VERY large, then running much more than 1k resamplings
59             # is VERY expensive. So 1k is probably a reasonable default.
60 0         0 return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000)
61             }
62              
63             sub mad {
64 26     26 0 45 my $self = shift;
65 26         37 my $median = $self->median;
66 26         30 my @val = map {abs($_ - $median)} @{$self->data};
  114         172  
  26         42  
67 26         81 return ref($self)->new(data => \@val)->median;
68             }
69              
70             sub mad_dev {
71 1     1 0 4 my $self = shift;
72 1         2 return $self->mad()*1.4826;
73             }
74              
75             sub std_dev {
76 2     2 0 23 my $self = shift;
77 2         4 my $data = $self->data;
78 2         20 my $mean = $self->mean;
79 2         3 my $var = 0;
80 2         42 $var += ($_-$mean)**2 for @$data;
81 2         5 $var /= @$data - 1;
82 2         9 return sqrt($var);
83             }
84              
85             sub filter_outliers {
86 14     14 0 1173 my $self = shift;
87 14         39 my %opt = @_;
88 14   100     38 my $var_measure = $opt{variability_measure} || 'mad';
89 14         18 my $n_sigma = $opt{nsigma_outliers};
90              
91             # If outlier rejection is turned off...
92 14 100       47 if (not $n_sigma) {
    50          
93 1         8 return ($self->data, []);
94             }
95             elsif ($n_sigma < 0) {
96 0         0 Carp::croak("A negative value for the number of 'sigmas' makes no sense");
97             }
98              
99 13         26 my $data = $self->data;
100              
101 13         20 my $median = $self->median;
102 13         38 my $variability = $self->$var_measure;
103 13         35 my @good;
104             my @outliers;
105 13         23 foreach my $x (@$data) {
106 55 100       90 if (abs($x-$median) <= $variability*$n_sigma) {
107 43         64 push @good, $x;
108             }
109             else {
110 12         20 push @outliers, $x;
111             }
112             }
113              
114 13         43 return(\@good, \@outliers);
115             }
116              
117              
118             1;