File Coverage

blib/lib/Dumbbench/Stats.pm
Criterion Covered Total %
statement 56 72 77.7
branch 5 6 83.3
condition 2 2 100.0
subroutine 16 20 80.0
pod 0 15 0.0
total 79 115 68.7


line stmt bran cond sub pod time code
1             package Dumbbench::Stats;
2 3     3   17 use strict;
  3         6  
  3         71  
3 3     3   12 use warnings;
  3         5  
  3         57  
4 3     3   12 use List::Util ();
  3         5  
  3         33  
5 3     3   1228 use Statistics::CaseResampling ();
  3         1562  
  3         99  
6              
7             use Class::XSAccessor {
8 3         20 constructor => 'new',
9             accessors => [qw/data name/],
10 3     3   20 };
  3         4  
11              
12             # Note: This is entirely unoptimized. There is a lot of unnecessary
13             # stuff going on. This is to allow the user to modify the data
14             # set in flight. If this comes back to haunt us at some point,
15             # we can still optimize, but at this point, convenience still wins.
16              
17             sub sorted_data {
18 0     0 0 0 my $self = shift;
19 0         0 my $sorted = [sort { $a <=> $b } @{$self->data}];
  0         0  
  0         0  
20 0         0 return $sorted;
21             }
22              
23 2     2 0 14 sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
24 2     2 0 470 sub second_quartile { return $_[0]->median }
25 2     2 0 18 sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
26              
27              
28 16     16 0 23 sub n { scalar(@{$_[0]->data}) }
  16         51  
29              
30             sub sum {
31 16     16 0 19 my $self = shift;
32 16         30 return List::Util::sum(@{$self->data});
  16         71  
33             }
34              
35             sub min {
36 0     0 0 0 my $self = shift;
37 0         0 return List::Util::min(@{$self->data});
  0         0  
38             }
39              
40             sub max {
41 0     0 0 0 my $self = shift;
42 0         0 return List::Util::max(@{$self->data});
  0         0  
43             }
44              
45             sub mean {
46 16     16 0 1429 my $self = shift;
47 16         34 return $self->sum / $self->n;
48             }
49              
50 68     68 0 255 sub median { Statistics::CaseResampling::median($_[0]->data) } # O(n)!
51              
52             sub median_confidence_limits {
53 0     0 0 0 my $self = shift;
54 0         0 my $nsigma = shift;
55 0         0 my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
56             # note: The 1000 here is kind of a lower limit for reasonable accuracy.
57             # But if the data set is small, that's more significant. If the data
58             # set is VERY large, then running much more than 1k resamplings
59             # is VERY expensive. So 1k is probably a reasonable default.
60 0         0 return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000)
61             }
62              
63             sub mad {
64 24     24 0 47 my $self = shift;
65 24         41 my $median = $self->median;
66 24         37 my @val = map {abs($_ - $median)} @{$self->data};
  103         190  
  24         48  
67 24         89 return ref($self)->new(data => \@val)->median;
68             }
69              
70             sub mad_dev {
71 1     1 0 3 my $self = shift;
72 1         3 return $self->mad()*1.4826;
73             }
74              
75             sub std_dev {
76 2     2 0 20 my $self = shift;
77 2         6 my $data = $self->data;
78 2         4 my $mean = $self->mean;
79 2         6 my $var = 0;
80 2         10 $var += ($_-$mean)**2 for @$data;
81 2         4 $var /= @$data - 1;
82 2         6 return sqrt($var);
83             }
84              
85             sub filter_outliers {
86 13     13 0 1002 my $self = shift;
87 13         37 my %opt = @_;
88 13   100     44 my $var_measure = $opt{variability_measure} || 'mad';
89 13         21 my $n_sigma = $opt{nsigma_outliers};
90              
91             # If outlier rejection is turned off...
92 13 100       41 if (not $n_sigma) {
    50          
93 1         7 return ($self->data, []);
94             }
95             elsif ($n_sigma < 0) {
96 0         0 Carp::croak("A negative value for the number of 'sigmas' makes no sense");
97             }
98              
99 12         28 my $data = $self->data;
100              
101 12         23 my $median = $self->median;
102 12         27 my $variability = $self->$var_measure;
103 12         31 my @good;
104             my @outliers;
105 12         25 foreach my $x (@$data) {
106 48 100       102 if (abs($x-$median) <= $variability*$n_sigma) {
107 39         57 push @good, $x;
108             }
109             else {
110 9         16 push @outliers, $x;
111             }
112             }
113              
114 12         41 return(\@good, \@outliers);
115             }
116              
117              
118             1;