File Coverage

blib/lib/Dumbbench/Stats.pm
Criterion Covered Total %
statement 56 72 77.7
branch 5 6 83.3
condition 2 2 100.0
subroutine 16 20 80.0
pod 0 15 0.0
total 79 115 68.7


line stmt bran cond sub pod time code
1             package Dumbbench::Stats;
2 3     3   19 use strict;
  3         5  
  3         80  
3 3     3   13 use warnings;
  3         5  
  3         82  
4 3     3   12 use List::Util ();
  3         5  
  3         40  
5 3     3   1045 use Statistics::CaseResampling ();
  3         1718  
  3         87  
6              
7             use Class::XSAccessor {
8 3         24 constructor => 'new',
9             accessors => [qw/data name/],
10 3     3   17 };
  3         5  
11              
12             # Note: This is entirely unoptimized. There is a lot of unnecessary
13             # stuff going on. This is to allow the user to modify the data
14             # set in flight. If this comes back to haunt us at some point,
15             # we can still optimize, but at this point, convenience still wins.
16              
17             sub sorted_data {
18 0     0 0 0 my $self = shift;
19 0         0 my $sorted = [sort { $a <=> $b } @{$self->data}];
  0         0  
  0         0  
20 0         0 return $sorted;
21             }
22              
23 2     2 0 15 sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
24 2     2 0 935 sub second_quartile { return $_[0]->median }
25 2     2 0 14 sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
26              
27              
28 17     17 0 21 sub n { scalar(@{$_[0]->data}) }
  17         49  
29              
30             sub sum {
31 17     17 0 20 my $self = shift;
32 17         20 return List::Util::sum(@{$self->data});
  17         76  
33             }
34              
35             sub min {
36 0     0 0 0 my $self = shift;
37 0         0 return List::Util::min(@{$self->data});
  0         0  
38             }
39              
40             sub max {
41 0     0 0 0 my $self = shift;
42 0         0 return List::Util::max(@{$self->data});
  0         0  
43             }
44              
45             sub mean {
46 17     17 0 1764 my $self = shift;
47 17         37 return $self->sum / $self->n;
48             }
49              
50 73     73 0 234 sub median { Statistics::CaseResampling::median($_[0]->data) } # O(n)!
51              
52             sub median_confidence_limits {
53 0     0 0 0 my $self = shift;
54 0         0 my $nsigma = shift;
55 0         0 my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
56             # note: The 1000 here is kind of a lower limit for reasonable accuracy.
57             # But if the data set is small, that's more significant. If the data
58             # set is VERY large, then running much more than 1k resamplings
59             # is VERY expensive. So 1k is probably a reasonable default.
60 0         0 return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000)
61             }
62              
63             sub mad {
64 26     26 0 37 my $self = shift;
65 26         34 my $median = $self->median;
66 26         30 my @val = map {abs($_ - $median)} @{$self->data};
  114         150  
  26         44  
67 26         74 return ref($self)->new(data => \@val)->median;
68             }
69              
70             sub mad_dev {
71 1     1 0 2 my $self = shift;
72 1         2 return $self->mad()*1.4826;
73             }
74              
75             sub std_dev {
76 2     2 0 16 my $self = shift;
77 2         5 my $data = $self->data;
78 2         3 my $mean = $self->mean;
79 2         3 my $var = 0;
80 2         7 $var += ($_-$mean)**2 for @$data;
81 2         4 $var /= @$data - 1;
82 2         4 return sqrt($var);
83             }
84              
85             sub filter_outliers {
86 14     14 0 822 my $self = shift;
87 14         38 my %opt = @_;
88 14   100     39 my $var_measure = $opt{variability_measure} || 'mad';
89 14         17 my $n_sigma = $opt{nsigma_outliers};
90              
91             # If outlier rejection is turned off...
92 14 100       39 if (not $n_sigma) {
    50          
93 1         4 return ($self->data, []);
94             }
95             elsif ($n_sigma < 0) {
96 0         0 Carp::croak("A negative value for the number of 'sigmas' makes no sense");
97             }
98              
99 13         27 my $data = $self->data;
100              
101 13         30 my $median = $self->median;
102 13         33 my $variability = $self->$var_measure;
103 13         38 my @good;
104             my @outliers;
105 13         23 foreach my $x (@$data) {
106 54 100       86 if (abs($x-$median) <= $variability*$n_sigma) {
107 44         56 push @good, $x;
108             }
109             else {
110 10         17 push @outliers, $x;
111             }
112             }
113              
114 13         35 return(\@good, \@outliers);
115             }
116              
117              
118             1;