line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::ChiSquare; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ChiSquare.pm |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Jon Orwant, orwant@media.mit.edu |
6
|
|
|
|
|
|
|
# David Cantrell, david@cantrell.org.uk |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# 31 Oct 95, revised Mon Oct 18 12:16:47 1999, and again November 2001 |
9
|
|
|
|
|
|
|
# to fix an off-by-one error |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Nov 2003, revised to support a larger table |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
# Copyright 1995, 1999, 2001 Jon Orwant. All rights reserved. |
14
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
15
|
|
|
|
|
|
|
# modify it under the same terms as Perl itself. |
16
|
|
|
|
|
|
|
# |
17
|
|
|
|
|
|
|
# some sections Copyright 2003 David Cantrell |
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
5814
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
43
|
|
20
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION @ISA @EXPORT); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
682
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
require Exporter; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
25
|
|
|
|
|
|
|
@EXPORT = qw(chisquare); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$VERSION = '0.6'; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my @chilevels = (); |
30
|
|
|
|
|
|
|
my @chitable = (); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$chilevels[$_] = [100, 99, 95, 90, 70, 50, 30, 10, 5, 1] foreach(1..20); |
33
|
|
|
|
|
|
|
# JONO's data 99% 95% 90% 70% 50% 30% 10% 5% 1% |
34
|
|
|
|
|
|
|
$chitable[1] = [ 0.00016, 0.0039, 0.016, 0.15, 0.46, 1.07, 2.71, 3.84, 6.64]; |
35
|
|
|
|
|
|
|
$chitable[2] = [ 0.020, 0.10, 0.21, 0.71, 1.39, 2.41, 4.60, 5.99, 9.21]; |
36
|
|
|
|
|
|
|
$chitable[3] = [ 0.12, 0.35, 0.58, 1.42, 2.37, 3.67, 6.25, 7.82, 11.34]; |
37
|
|
|
|
|
|
|
$chitable[4] = [ 0.30, 0.71, 1.06, 2.20, 3.36, 4.88, 7.78, 9.49, 13.28]; |
38
|
|
|
|
|
|
|
$chitable[5] = [ 0.55, 1.14, 1.61, 3.00, 4.35, 6.06, 9.24, 11.07, 15.09]; |
39
|
|
|
|
|
|
|
$chitable[6] = [ 0.87, 1.64, 2.20, 3.83, 5.35, 7.23, 10.65, 12.59, 16.81]; |
40
|
|
|
|
|
|
|
$chitable[7] = [ 1.24, 2.17, 2.83, 4.67, 6.35, 8.38, 12.02, 14.07, 18.48]; |
41
|
|
|
|
|
|
|
$chitable[8] = [ 1.65, 2.73, 3.49, 5.53, 7.34, 9.52, 13.36, 15.51, 20.09]; |
42
|
|
|
|
|
|
|
$chitable[9] = [ 2.09, 3.33, 4.17, 6.39, 8.34, 10.66, 14.68, 16.92, 21.67]; |
43
|
|
|
|
|
|
|
$chitable[10] = [ 2.56, 3.94, 4.86, 7.27, 9.34, 11.78, 15.99, 18.31, 23.21]; |
44
|
|
|
|
|
|
|
$chitable[11] = [ 3.05, 4.58, 5.58, 8.15, 10.34, 12.90, 17.28, 19.68, 24.73]; |
45
|
|
|
|
|
|
|
$chitable[12] = [ 3.57, 5.23, 6.30, 9.03, 11.34, 14.01, 18.55, 21.03, 26.22]; |
46
|
|
|
|
|
|
|
$chitable[13] = [ 4.11, 5.89, 7.04, 9.93, 12.34, 15.12, 19.81, 22.36, 27.69]; |
47
|
|
|
|
|
|
|
$chitable[14] = [ 4.66, 6.57, 7.79, 10.82, 13.34, 16.22, 21.06, 23.69, 29.14]; |
48
|
|
|
|
|
|
|
$chitable[15] = [ 5.23, 7.26, 8.55, 11.72, 14.34, 17.32, 22.31, 25.00, 30.58]; |
49
|
|
|
|
|
|
|
$chitable[16] = [ 5.81, 7.96, 9.31, 12.62, 15.34, 18.42, 23.54, 26.30, 32.00]; |
50
|
|
|
|
|
|
|
$chitable[17] = [ 6.41, 8.67, 10.09, 13.53, 16.34, 19.51, 24.77, 27.59, 33.41]; |
51
|
|
|
|
|
|
|
$chitable[18] = [ 7.00, 9.39, 10.87, 14.44, 17.34, 20.60, 25.99, 28.87, 34.81]; |
52
|
|
|
|
|
|
|
$chitable[19] = [ 7.63, 10.12, 11.65, 15.35, 18.34, 21.69, 27.20, 30.14, 36.19]; |
53
|
|
|
|
|
|
|
$chitable[20] = [ 8.26, 10.85, 12.44, 16.27, 19.34, 22.78, 28.41, 31.41, 37.57]; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
$chilevels[$_] = [100, 99, 95, 90, 75, 50, 25, 10, 5, 1] foreach(21..30); |
56
|
|
|
|
|
|
|
# DCANTRELL's data 99% 95% 90% 75% 50% 25% 10% 5% 1% |
57
|
|
|
|
|
|
|
$chitable[21] = [ 8.90, 11.59, 13.24, 16.34, 20.34, 24.93, 29.62, 32.67, 38.93]; |
58
|
|
|
|
|
|
|
$chitable[22] = [ 9.54, 12.34, 14.04, 17.24, 21.34, 26.04, 30.81, 33.92, 40.29]; |
59
|
|
|
|
|
|
|
$chitable[23] = [10.20, 13.09, 14.85, 18.14, 22.34, 27.14, 32.01, 35.17, 41.64]; |
60
|
|
|
|
|
|
|
$chitable[24] = [10.86, 13.85, 15.66, 19.04, 23.34, 28.24, 33.20, 36.42, 42.98]; |
61
|
|
|
|
|
|
|
$chitable[25] = [11.52, 14.61, 16.47, 19.94, 24.34, 39.34, 34.38, 37.65, 44.31]; |
62
|
|
|
|
|
|
|
$chitable[26] = [12.20, 15.38, 17.29, 20.84, 25.34, 30.43, 35.56, 38.89, 45.64]; |
63
|
|
|
|
|
|
|
$chitable[27] = [12.87, 16.15, 18.11, 21.75, 26.34, 31.53, 36.74, 40.11, 46.96]; |
64
|
|
|
|
|
|
|
$chitable[28] = [13.56, 16.93, 18.94, 22.66, 27.34, 32.62, 37.92, 41.34, 48.28]; |
65
|
|
|
|
|
|
|
$chitable[29] = [14.26, 17.71, 19.77, 23.57, 28.34, 33.71, 39.09, 42.56, 49.59]; |
66
|
|
|
|
|
|
|
$chitable[30] = [14.95, 18.49, 20.60, 24.48, 29.34, 34.80, 40.26, 43.77, 50.89]; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# assume the expected probability distribution is uniform |
69
|
|
|
|
|
|
|
sub chisquare { |
70
|
3
|
|
|
3
|
1
|
182
|
my @data = @_; |
71
|
3
|
50
|
33
|
|
|
32
|
@data = @{$data[0]} if @data == 1 and ref($data[0]); |
|
0
|
|
|
|
|
0
|
|
72
|
3
|
50
|
|
|
|
9
|
return "There's no data!" unless @data; |
73
|
|
|
|
|
|
|
|
74
|
3
|
|
|
|
|
6
|
my $degrees_of_freedom = scalar(@data) - 1; |
75
|
3
|
|
|
|
|
8
|
my ($chisquare, $num_samples, $expected, $i) = (0, 0, 0, 0); |
76
|
3
|
50
|
|
|
|
9
|
if (! ref($chitable[$degrees_of_freedom])) { |
77
|
0
|
|
|
|
|
0
|
return "I can't handle ".scalar(@data)." choices without a better table."; |
78
|
|
|
|
|
|
|
} |
79
|
3
|
|
|
|
|
8
|
foreach (@data) { $num_samples += $_ } |
|
58
|
|
|
|
|
414
|
|
80
|
3
|
|
|
|
|
8
|
$expected = $num_samples / scalar(@data); |
81
|
|
|
|
|
|
|
# return "There's no data!" unless $expected; |
82
|
3
|
|
|
|
|
6
|
foreach (@data) { |
83
|
58
|
|
|
|
|
91
|
$chisquare += (($_ - $expected) ** 2) / $expected; |
84
|
|
|
|
|
|
|
} |
85
|
3
|
|
|
|
|
4
|
foreach (@{$chitable[$degrees_of_freedom]}) { |
|
3
|
|
|
|
|
9
|
|
86
|
11
|
100
|
|
|
|
101
|
if ($chisquare < $_) { |
87
|
2
|
|
|
|
|
84
|
return "There's a >".$chilevels[$degrees_of_freedom]->[$i+1]."% chance, ". |
88
|
|
|
|
|
|
|
"and a <".$chilevels[$degrees_of_freedom]->[$i]."% chance, that this data is random."; |
89
|
|
|
|
|
|
|
} |
90
|
9
|
|
|
|
|
15
|
$i++; |
91
|
|
|
|
|
|
|
} |
92
|
1
|
|
|
|
|
3
|
return "There's a <".(@{$chilevels[$degrees_of_freedom]})[-1]."% chance that this data is random."; |
|
1
|
|
|
|
|
10
|
|
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
1; |
96
|
|
|
|
|
|
|
__END__ |