File Coverage

blib/lib/Statistics/ChiSquare.pm
Criterion Covered Total %
statement 24 26 92.3
branch 5 8 62.5
condition 1 3 33.3
subroutine 3 3 100.0
pod 1 1 100.0
total 34 41 82.9


line stmt bran cond sub pod time code
1             package Statistics::ChiSquare;
2              
3             # ChiSquare.pm
4             #
5             # Jon Orwant, orwant@media.mit.edu
6             # David Cantrell, david@cantrell.org.uk
7             #
8             # 31 Oct 95, revised Mon Oct 18 12:16:47 1999, and again November 2001
9             # to fix an off-by-one error
10             #
11             # Nov 2003, revised to support a larger table
12             #
13             # Copyright 1995, 1999, 2001 Jon Orwant. All rights reserved.
14             # This program is free software; you can redistribute it and/or
15             # modify it under the same terms as Perl itself.
16             #
17             # some sections Copyright 2003 David Cantrell
18              
19 1     1   5814 use strict;
  1         3  
  1         43  
20 1     1   5 use vars qw($VERSION @ISA @EXPORT);
  1         2  
  1         682  
21              
22             require Exporter;
23              
24             @ISA = qw(Exporter);
25             @EXPORT = qw(chisquare);
26              
27             $VERSION = '0.6';
28              
29             my @chilevels = ();
30             my @chitable = ();
31              
32             $chilevels[$_] = [100, 99, 95, 90, 70, 50, 30, 10, 5, 1] foreach(1..20);
33             # JONO's data 99% 95% 90% 70% 50% 30% 10% 5% 1%
34             $chitable[1] = [ 0.00016, 0.0039, 0.016, 0.15, 0.46, 1.07, 2.71, 3.84, 6.64];
35             $chitable[2] = [ 0.020, 0.10, 0.21, 0.71, 1.39, 2.41, 4.60, 5.99, 9.21];
36             $chitable[3] = [ 0.12, 0.35, 0.58, 1.42, 2.37, 3.67, 6.25, 7.82, 11.34];
37             $chitable[4] = [ 0.30, 0.71, 1.06, 2.20, 3.36, 4.88, 7.78, 9.49, 13.28];
38             $chitable[5] = [ 0.55, 1.14, 1.61, 3.00, 4.35, 6.06, 9.24, 11.07, 15.09];
39             $chitable[6] = [ 0.87, 1.64, 2.20, 3.83, 5.35, 7.23, 10.65, 12.59, 16.81];
40             $chitable[7] = [ 1.24, 2.17, 2.83, 4.67, 6.35, 8.38, 12.02, 14.07, 18.48];
41             $chitable[8] = [ 1.65, 2.73, 3.49, 5.53, 7.34, 9.52, 13.36, 15.51, 20.09];
42             $chitable[9] = [ 2.09, 3.33, 4.17, 6.39, 8.34, 10.66, 14.68, 16.92, 21.67];
43             $chitable[10] = [ 2.56, 3.94, 4.86, 7.27, 9.34, 11.78, 15.99, 18.31, 23.21];
44             $chitable[11] = [ 3.05, 4.58, 5.58, 8.15, 10.34, 12.90, 17.28, 19.68, 24.73];
45             $chitable[12] = [ 3.57, 5.23, 6.30, 9.03, 11.34, 14.01, 18.55, 21.03, 26.22];
46             $chitable[13] = [ 4.11, 5.89, 7.04, 9.93, 12.34, 15.12, 19.81, 22.36, 27.69];
47             $chitable[14] = [ 4.66, 6.57, 7.79, 10.82, 13.34, 16.22, 21.06, 23.69, 29.14];
48             $chitable[15] = [ 5.23, 7.26, 8.55, 11.72, 14.34, 17.32, 22.31, 25.00, 30.58];
49             $chitable[16] = [ 5.81, 7.96, 9.31, 12.62, 15.34, 18.42, 23.54, 26.30, 32.00];
50             $chitable[17] = [ 6.41, 8.67, 10.09, 13.53, 16.34, 19.51, 24.77, 27.59, 33.41];
51             $chitable[18] = [ 7.00, 9.39, 10.87, 14.44, 17.34, 20.60, 25.99, 28.87, 34.81];
52             $chitable[19] = [ 7.63, 10.12, 11.65, 15.35, 18.34, 21.69, 27.20, 30.14, 36.19];
53             $chitable[20] = [ 8.26, 10.85, 12.44, 16.27, 19.34, 22.78, 28.41, 31.41, 37.57];
54              
55             $chilevels[$_] = [100, 99, 95, 90, 75, 50, 25, 10, 5, 1] foreach(21..30);
56             # DCANTRELL's data 99% 95% 90% 75% 50% 25% 10% 5% 1%
57             $chitable[21] = [ 8.90, 11.59, 13.24, 16.34, 20.34, 24.93, 29.62, 32.67, 38.93];
58             $chitable[22] = [ 9.54, 12.34, 14.04, 17.24, 21.34, 26.04, 30.81, 33.92, 40.29];
59             $chitable[23] = [10.20, 13.09, 14.85, 18.14, 22.34, 27.14, 32.01, 35.17, 41.64];
60             $chitable[24] = [10.86, 13.85, 15.66, 19.04, 23.34, 28.24, 33.20, 36.42, 42.98];
61             $chitable[25] = [11.52, 14.61, 16.47, 19.94, 24.34, 39.34, 34.38, 37.65, 44.31];
62             $chitable[26] = [12.20, 15.38, 17.29, 20.84, 25.34, 30.43, 35.56, 38.89, 45.64];
63             $chitable[27] = [12.87, 16.15, 18.11, 21.75, 26.34, 31.53, 36.74, 40.11, 46.96];
64             $chitable[28] = [13.56, 16.93, 18.94, 22.66, 27.34, 32.62, 37.92, 41.34, 48.28];
65             $chitable[29] = [14.26, 17.71, 19.77, 23.57, 28.34, 33.71, 39.09, 42.56, 49.59];
66             $chitable[30] = [14.95, 18.49, 20.60, 24.48, 29.34, 34.80, 40.26, 43.77, 50.89];
67              
68             # assume the expected probability distribution is uniform
69             sub chisquare {
70 3     3 1 182 my @data = @_;
71 3 50 33     32 @data = @{$data[0]} if @data == 1 and ref($data[0]);
  0         0  
72 3 50       9 return "There's no data!" unless @data;
73            
74 3         6 my $degrees_of_freedom = scalar(@data) - 1;
75 3         8 my ($chisquare, $num_samples, $expected, $i) = (0, 0, 0, 0);
76 3 50       9 if (! ref($chitable[$degrees_of_freedom])) {
77 0         0 return "I can't handle ".scalar(@data)." choices without a better table.";
78             }
79 3         8 foreach (@data) { $num_samples += $_ }
  58         414  
80 3         8 $expected = $num_samples / scalar(@data);
81             # return "There's no data!" unless $expected;
82 3         6 foreach (@data) {
83 58         91 $chisquare += (($_ - $expected) ** 2) / $expected;
84             }
85 3         4 foreach (@{$chitable[$degrees_of_freedom]}) {
  3         9  
86 11 100       101 if ($chisquare < $_) {
87 2         84 return "There's a >".$chilevels[$degrees_of_freedom]->[$i+1]."% chance, ".
88             "and a <".$chilevels[$degrees_of_freedom]->[$i]."% chance, that this data is random.";
89             }
90 9         15 $i++;
91             }
92 1         3 return "There's a <".(@{$chilevels[$degrees_of_freedom]})[-1]."% chance that this data is random.";
  1         10  
93             }
94              
95             1;
96             __END__