| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Statistics::Gtest; |
|
4
|
|
|
|
|
|
|
|
|
5
|
4
|
|
|
4
|
|
131638
|
use strict; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
169
|
|
|
6
|
4
|
|
|
4
|
|
21
|
use vars qw($VERSION); |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
211
|
|
|
7
|
4
|
|
|
4
|
|
24
|
use Carp; |
|
|
4
|
|
|
|
|
13
|
|
|
|
4
|
|
|
|
|
481
|
|
|
8
|
4
|
|
|
4
|
|
3793
|
use IO::File; |
|
|
4
|
|
|
|
|
52907
|
|
|
|
4
|
|
|
|
|
10604
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
$VERSION = '0.07'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $self; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub new { |
|
15
|
10
|
|
|
10
|
0
|
8350
|
my ($type, $data) = @_; |
|
16
|
10
|
|
|
|
|
197
|
my $datahandle; |
|
17
|
10
|
50
|
|
|
|
41
|
if (! $data) { |
|
18
|
0
|
|
|
|
|
0
|
_error(1); |
|
19
|
|
|
|
|
|
|
} |
|
20
|
10
|
50
|
|
|
|
44
|
if (@_ eq 3) { |
|
21
|
0
|
|
|
|
|
0
|
_error(2); |
|
22
|
|
|
|
|
|
|
} |
|
23
|
10
|
50
|
|
|
|
43
|
if ($datahandle = _getHandle($data)) { |
|
24
|
10
|
|
|
|
|
25
|
$self = {}; |
|
25
|
10
|
|
|
|
|
112
|
_initialize($datahandle); |
|
26
|
10
|
|
|
|
|
32
|
bless($self, $type); |
|
27
|
10
|
|
|
|
|
34
|
$self; |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
else { |
|
30
|
0
|
|
|
|
|
0
|
_error(3); |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub getG { |
|
35
|
10
|
|
|
10
|
1
|
22
|
my $self = shift; |
|
36
|
10
|
|
|
|
|
29
|
$self->{'G'} = $self->getRawG() / $self->getQ(); |
|
37
|
10
|
|
|
|
|
51
|
$self->{'G'}; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub getRawG { |
|
41
|
20
|
|
|
20
|
1
|
38
|
my $self = shift; |
|
42
|
20
|
|
|
|
|
56
|
$self->{'logsum'} = _sumCellOp(); |
|
43
|
20
|
|
|
|
|
52
|
$self->{'G'} = 2 * $self->{'logsum'}; |
|
44
|
20
|
|
|
|
|
77
|
2 * $self->{'logsum'}; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub getQ { |
|
48
|
20
|
|
|
20
|
1
|
1611
|
my $self = shift; |
|
49
|
20
|
|
|
|
|
56
|
$self->{'q'} = _williamsC(); |
|
50
|
20
|
|
|
|
|
92
|
$self->{'q'}; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub setExpected { |
|
54
|
3
|
|
|
3
|
1
|
6
|
my ($self, $expData) = @_; |
|
55
|
3
|
|
|
|
|
7
|
my $datahandle; |
|
56
|
|
|
|
|
|
|
my @expect; |
|
57
|
3
|
|
|
|
|
4
|
my $expectTotal = 0; |
|
58
|
3
|
50
|
|
|
|
13
|
if (@_ eq 3) { |
|
59
|
0
|
|
|
|
|
0
|
_error(2); |
|
60
|
|
|
|
|
|
|
} |
|
61
|
3
|
50
|
|
|
|
12
|
if ($datahandle = _getHandle($expData)) { |
|
62
|
3
|
|
|
|
|
7
|
foreach my $row_ref (@{$datahandle}) { |
|
|
3
|
|
|
|
|
9
|
|
|
63
|
3
|
50
|
|
|
|
10
|
if (!ref $row_ref) { |
|
64
|
0
|
|
|
|
|
0
|
_error(4); |
|
65
|
|
|
|
|
|
|
} |
|
66
|
3
|
|
|
|
|
5
|
my @row = @{$row_ref}; |
|
|
3
|
|
|
|
|
11
|
|
|
67
|
3
|
|
|
|
|
7
|
foreach my $cell (@row) { |
|
68
|
12
|
|
|
|
|
22
|
_checkNumValidity($cell); |
|
69
|
12
|
|
|
|
|
21
|
push(@expect, $cell); |
|
70
|
12
|
|
|
|
|
20
|
$expectTotal += $cell; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
3
|
|
|
|
|
7
|
$self->{'expected'} = \@expect; |
|
73
|
|
|
|
|
|
|
# Expected values no longer intrinsic to observed data, so set |
|
74
|
|
|
|
|
|
|
# flag to 0. |
|
75
|
3
|
|
|
|
|
15
|
$self->{'intrinsic'} = 0; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
else { |
|
79
|
0
|
|
|
|
|
0
|
_error(3); |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
# Data sanity checks |
|
82
|
3
|
50
|
|
|
|
20
|
if ($expectTotal != $self->{'sumtotal'}) { |
|
83
|
0
|
|
|
|
|
0
|
warn "Warning: Total of expected values ($expectTotal) does not ", |
|
84
|
|
|
|
|
|
|
"equal total of observed values (",$self->{'sumtotal'},").\nThis ", |
|
85
|
|
|
|
|
|
|
"will invalidate the test unless the discrepancy is very minor ", |
|
86
|
|
|
|
|
|
|
"\n(e.g., the result of rounding error).\n"; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub getObserved { |
|
91
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
92
|
0
|
|
|
|
|
0
|
my $exp = _formatData($self->{'observed'}); |
|
93
|
0
|
|
|
|
|
0
|
$exp; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub getExpected { |
|
97
|
6
|
|
|
6
|
1
|
16
|
my $self = shift; |
|
98
|
6
|
|
|
|
|
29
|
my $exp = _formatData($self->{'expected'}); |
|
99
|
6
|
|
|
|
|
38
|
$exp; |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub getDF { |
|
103
|
10
|
|
|
10
|
1
|
23671
|
my $self = shift; |
|
104
|
10
|
|
|
|
|
97
|
$self->{'df'}; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub setDF { |
|
108
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
109
|
0
|
|
|
|
|
0
|
$self->{'df'} = shift; |
|
110
|
|
|
|
|
|
|
# Degrees of freedom set externally. |
|
111
|
0
|
|
|
|
|
0
|
$self->{'df_extern'} = 1; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub getDFstate { |
|
115
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
116
|
0
|
|
|
|
|
0
|
$self->{'df_extern'}; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub getHypothesis { |
|
120
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
121
|
0
|
|
|
|
|
0
|
$self->{'intrinsic'}; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub getRow { |
|
125
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
126
|
0
|
|
|
|
|
0
|
my $idx = shift; |
|
127
|
0
|
0
|
|
|
|
0
|
if ($idx > ($self->{'nrows'} - 1)) { |
|
128
|
0
|
|
|
|
|
0
|
return; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
0
|
|
|
|
|
0
|
my @row = (); |
|
131
|
0
|
|
|
|
|
0
|
my $skip = $self->{'ncols'}; |
|
132
|
0
|
|
|
|
|
0
|
my $offset = $idx * $skip; |
|
133
|
0
|
|
|
|
|
0
|
my $final = $offset + $skip; |
|
134
|
0
|
|
|
|
|
0
|
for my $cell ($offset .. $final - 1) { |
|
135
|
0
|
|
|
|
|
0
|
push(@row, $self->{'observed'}->[$cell]); |
|
136
|
|
|
|
|
|
|
} |
|
137
|
0
|
|
|
|
|
0
|
\@row; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub getCol { |
|
141
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
142
|
0
|
|
|
|
|
0
|
my $idx = shift; |
|
143
|
0
|
0
|
|
|
|
0
|
if ($idx > ($self->{'ncols'} - 1)) { |
|
144
|
0
|
|
|
|
|
0
|
return; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
0
|
|
|
|
|
0
|
my @col = (); |
|
147
|
0
|
|
|
|
|
0
|
my $skip = $self->{'ncols'}; |
|
148
|
0
|
|
|
|
|
0
|
my $max = $skip * $self->{'nrows'}; |
|
149
|
0
|
|
|
|
|
0
|
for (my $cell = $idx; $cell < $max; $cell += $skip) { |
|
150
|
0
|
|
|
|
|
0
|
push(@col,$self->{'observed'}->[$cell]); |
|
151
|
|
|
|
|
|
|
} |
|
152
|
0
|
|
|
|
|
0
|
\@col; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub rowSum { |
|
156
|
20
|
|
|
20
|
1
|
45
|
my $self = shift; |
|
157
|
20
|
|
|
|
|
38
|
my $idx = shift; |
|
158
|
20
|
|
|
|
|
127
|
$self->{'rowsums'}->[$idx]; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub colSum { |
|
162
|
20
|
|
|
20
|
1
|
38
|
my $self = shift; |
|
163
|
20
|
|
|
|
|
31
|
my $idx = shift; |
|
164
|
20
|
|
|
|
|
105
|
$self->{'colsums'}->[$idx]; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub getRowNum { |
|
168
|
10
|
|
|
10
|
1
|
30
|
my $self = shift; |
|
169
|
10
|
|
|
|
|
53
|
$self->{'nrows'}; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub getColNum { |
|
173
|
10
|
|
|
10
|
1
|
23
|
my $self = shift; |
|
174
|
10
|
|
|
|
|
60
|
$self->{'ncols'}; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub getSumTotal { |
|
178
|
10
|
|
|
10
|
1
|
24
|
my $self = shift; |
|
179
|
10
|
|
|
|
|
56
|
$self->{'sumtotal'}; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# private methods |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Do all the initialization work: verify the data and write it into the |
|
185
|
|
|
|
|
|
|
# object; calculate the row and column sums and grand total; determine |
|
186
|
|
|
|
|
|
|
# the format of the data table (rows x columns); calculate the likely |
|
187
|
|
|
|
|
|
|
# degrees of freedom and generate default expected values. Set state |
|
188
|
|
|
|
|
|
|
# flags indicating that the expected values are 'intrinsic' (generated |
|
189
|
|
|
|
|
|
|
# from the observed data), and that the degrees of freedom are also |
|
190
|
|
|
|
|
|
|
# based on table characteristics. |
|
191
|
|
|
|
|
|
|
sub _initialize { |
|
192
|
10
|
|
|
10
|
|
20
|
my $datahandle = shift; |
|
193
|
10
|
|
|
|
|
20
|
my @datastruct = (); |
|
194
|
10
|
|
|
|
|
16
|
my @rowsums = (); |
|
195
|
10
|
|
|
|
|
18
|
my @sumcols = (); |
|
196
|
10
|
|
|
|
|
17
|
my ($ncols, $nrows, $sumtotal); |
|
197
|
10
|
|
|
|
|
43
|
my $totalN = 0; |
|
198
|
10
|
|
|
|
|
33
|
$self->{'colsums'} = (); |
|
199
|
|
|
|
|
|
|
|
|
200
|
10
|
|
|
|
|
24
|
foreach my $row_ref (@{$datahandle}) { |
|
|
10
|
|
|
|
|
25
|
|
|
201
|
17
|
50
|
|
|
|
53
|
if (!ref $row_ref) { |
|
202
|
0
|
|
|
|
|
0
|
_error(4); |
|
203
|
|
|
|
|
|
|
} |
|
204
|
17
|
|
|
|
|
23
|
my @row = @{$row_ref}; |
|
|
17
|
|
|
|
|
58
|
|
|
205
|
17
|
|
|
|
|
27
|
$ncols = scalar @row; |
|
206
|
17
|
|
|
|
|
22
|
my $i = 0; |
|
207
|
17
|
|
|
|
|
30
|
foreach my $cell (@row) { |
|
208
|
46
|
|
|
|
|
87
|
_checkNumValidity($cell); |
|
209
|
46
|
|
|
|
|
72
|
push(@datastruct, $cell); |
|
210
|
46
|
|
|
|
|
80
|
$sumtotal += $cell; |
|
211
|
46
|
|
|
|
|
96
|
$self->{'colsums'}->[$i] += $cell; |
|
212
|
46
|
|
|
|
|
70
|
$i++; |
|
213
|
46
|
|
|
|
|
96
|
$totalN++; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
17
|
|
|
|
|
27
|
$nrows++; |
|
216
|
17
|
|
|
|
|
46
|
push(@rowsums, _rowsum(\@row)); |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# Data sanity checks; make sure data exist and are internally |
|
220
|
|
|
|
|
|
|
# consistent. |
|
221
|
10
|
50
|
|
|
|
34
|
if ($totalN == 0) { |
|
222
|
0
|
|
|
|
|
0
|
_error(5); |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
10
|
50
|
|
|
|
34
|
if ($totalN != ($nrows * $ncols)) { |
|
226
|
0
|
|
|
|
|
0
|
_error(6); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
10
|
50
|
33
|
|
|
82
|
if (($ncols == 0) || ($nrows == 0)) { |
|
230
|
0
|
|
|
|
|
0
|
_error(7); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
10
|
|
|
|
|
30
|
my $tSum = 0; |
|
234
|
10
|
|
|
|
|
27
|
foreach my $r (@rowsums) { |
|
235
|
17
|
|
|
|
|
32
|
$tSum += $r; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
10
|
50
|
|
|
|
47
|
if ($sumtotal != $tSum) { |
|
238
|
0
|
|
|
|
|
0
|
_error(6); |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
10
|
|
|
|
|
14
|
$tSum = 0; |
|
242
|
10
|
|
|
|
|
17
|
foreach my $r (@{$self->{'colsums'}}) { |
|
|
10
|
|
|
|
|
39
|
|
|
243
|
32
|
|
|
|
|
47
|
$tSum += $r; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
10
|
50
|
|
|
|
39
|
if ($sumtotal != $tSum) { |
|
246
|
0
|
|
|
|
|
0
|
_error(6); |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
10
|
|
|
|
|
26
|
$self->{'observed'} = \@datastruct; |
|
250
|
10
|
|
|
|
|
25
|
$self->{'rowsums'} = \@rowsums; |
|
251
|
10
|
|
|
|
|
20
|
$self->{'nrows'} = $nrows; |
|
252
|
10
|
|
|
|
|
21
|
$self->{'ncols'} = $ncols; |
|
253
|
10
|
|
|
|
|
22
|
$self->{'sumtotal'} = $sumtotal; |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# tabletype: 0 = 2-way, 1 = single column, 2 = single row |
|
256
|
10
|
100
|
100
|
|
|
56
|
if (($ncols == 1) || ($nrows == 1)) { |
|
257
|
6
|
100
|
|
|
|
28
|
if ($ncols == 1) { |
|
258
|
1
|
|
|
|
|
3
|
$self->{'df'} = $nrows - 1; |
|
259
|
1
|
|
|
|
|
4
|
$self->{'tabletype'} = 1; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
6
|
100
|
|
|
|
21
|
if ($nrows == 1) { |
|
262
|
5
|
|
|
|
|
14
|
$self->{'df'} = $ncols - 1; |
|
263
|
5
|
|
|
|
|
18
|
$self->{'tabletype'} = 2; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
else { |
|
267
|
4
|
|
|
|
|
14
|
$self->{'df'} = ($nrows - 1) * ($ncols - 1); |
|
268
|
4
|
|
|
|
|
13
|
$self->{'tabletype'} = 0; |
|
269
|
|
|
|
|
|
|
} |
|
270
|
10
|
|
|
|
|
35
|
$self->{'expected'}= _expected(\@datastruct); |
|
271
|
10
|
|
|
|
|
20
|
$self->{'intrinsic'} = 1; |
|
272
|
10
|
|
|
|
|
41
|
$self->{'df_extern'} = 0; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# Calculate and return William's correction, q. Calculation varies |
|
276
|
|
|
|
|
|
|
# depending on table format (1-way vs. 2-way). |
|
277
|
|
|
|
|
|
|
sub _williamsC { |
|
278
|
20
|
|
|
20
|
|
41
|
my $sumtotal = $self->{'sumtotal'}; |
|
279
|
20
|
|
|
|
|
43
|
my $rowsums = $self->{'rowsums'}; |
|
280
|
20
|
|
|
|
|
39
|
my $colsums = $self->{'colsums'}; |
|
281
|
20
|
100
|
|
|
|
94
|
if ($self->{'tabletype'} == 1) { |
|
|
|
100
|
|
|
|
|
|
|
282
|
2
|
|
|
|
|
12
|
$self->{'q'} = |
|
283
|
|
|
|
|
|
|
1 + ($self->{'nrows'} + 1) / (6 * $sumtotal); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
elsif ($self->{'tabletype'} == 2) { |
|
286
|
10
|
|
|
|
|
56
|
$self->{'q'} = |
|
287
|
|
|
|
|
|
|
1 + ($self->{'ncols'} + 1) / (6 * $sumtotal); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
else { |
|
290
|
8
|
|
|
|
|
21
|
my $recipRows = 0; |
|
291
|
8
|
|
|
|
|
13
|
my $recipCols = 0; |
|
292
|
8
|
|
|
|
|
16
|
foreach my $rval (@{$rowsums}) { |
|
|
8
|
|
|
|
|
19
|
|
|
293
|
16
|
|
|
|
|
43
|
$recipRows += (1 / $rval); |
|
294
|
|
|
|
|
|
|
} |
|
295
|
8
|
|
|
|
|
16
|
foreach my $cval (@{$colsums}) { |
|
|
8
|
|
|
|
|
18
|
|
|
296
|
22
|
|
|
|
|
85
|
$recipCols += (1 / $cval); |
|
297
|
|
|
|
|
|
|
} |
|
298
|
8
|
|
|
|
|
26
|
my $denom = 6 * $sumtotal * $self->{'df'}; |
|
299
|
8
|
|
|
|
|
49
|
my $num = ($sumtotal * $recipRows - 1) * ($sumtotal * $recipCols - 1); |
|
300
|
8
|
|
|
|
|
39
|
$self->{'q'} = 1 + $num / $denom; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# Generate the default expected values, based on a hypothesis of |
|
305
|
|
|
|
|
|
|
# independence (no association) between factors. |
|
306
|
|
|
|
|
|
|
# Return ref to expected array. |
|
307
|
|
|
|
|
|
|
sub _expected { |
|
308
|
10
|
|
|
10
|
|
17
|
my $data = shift; |
|
309
|
10
|
|
|
|
|
19
|
my @expected = (); |
|
310
|
10
|
|
|
|
|
19
|
my $rows = $self->{'nrows'}; |
|
311
|
10
|
|
|
|
|
20
|
my $cols = $self->{'ncols'}; |
|
312
|
10
|
|
|
|
|
14
|
my $sumtotal = $self->{'sumtotal'}; |
|
313
|
10
|
|
|
|
|
35
|
for my $row (0 .. $rows-1) { |
|
314
|
17
|
|
|
|
|
46
|
my $marginalRow = $self->{'rowsums'}->[$row] / $sumtotal; |
|
315
|
17
|
|
|
|
|
19
|
my @a; |
|
316
|
17
|
|
|
|
|
32
|
for my $col (0 .. $cols-1) { |
|
317
|
46
|
|
|
|
|
72
|
my $marginalCol = $self->{'colsums'}->[$col] / $sumtotal; |
|
318
|
46
|
|
|
|
|
114
|
push(@expected, ($marginalRow * $marginalCol * $sumtotal)); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
} |
|
321
|
10
|
|
|
|
|
38
|
\@expected; |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Calculate the log-likelihood statistic. |
|
325
|
|
|
|
|
|
|
# Return sum of Obs * ln(Obs/Exp) for all cells. |
|
326
|
|
|
|
|
|
|
sub _sumCellOp { |
|
327
|
20
|
|
|
20
|
|
59
|
my $logsum = 0; |
|
328
|
20
|
|
|
|
|
59
|
my $ncells = $self->{'nrows'} * $self->{'ncols'}; |
|
329
|
20
|
|
|
|
|
35
|
my $o = $self->{'observed'}; |
|
330
|
20
|
|
|
|
|
34
|
my $e = $self->{'expected'}; |
|
331
|
20
|
|
|
|
|
57
|
for my $cell (0 .. $ncells-1) { |
|
332
|
92
|
|
|
|
|
320
|
$logsum += $o->[$cell] * log($o->[$cell] / $e->[$cell]); |
|
333
|
|
|
|
|
|
|
} |
|
334
|
20
|
|
|
|
|
63
|
$logsum; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# Return sum of the cells in this row. |
|
338
|
|
|
|
|
|
|
sub _rowsum { |
|
339
|
17
|
|
|
17
|
|
24
|
my $row = shift; |
|
340
|
17
|
|
|
|
|
23
|
my $total = 0; |
|
341
|
17
|
|
|
|
|
43
|
foreach my $val (@{$row}) { |
|
|
17
|
|
|
|
|
29
|
|
|
342
|
46
|
|
|
|
|
61
|
$total += $val; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
17
|
|
|
|
|
61
|
$total; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# Order the data array back into the format originally input. |
|
348
|
|
|
|
|
|
|
sub _formatData { |
|
349
|
6
|
|
|
6
|
|
9
|
my $ref = shift; |
|
350
|
6
|
|
|
|
|
13
|
my @data; |
|
351
|
6
|
|
|
|
|
10
|
my $len = scalar @{$ref}; |
|
|
6
|
|
|
|
|
13
|
|
|
352
|
6
|
|
|
|
|
22
|
my $rlen = $len / $self->{'nrows'}; |
|
353
|
6
|
|
|
|
|
14
|
my $idx = $rlen - 1; |
|
354
|
6
|
|
|
|
|
31
|
for (my $r=0; $r<$len; $r+=$rlen) { |
|
355
|
9
|
|
|
|
|
25
|
my $row = [ @{$ref}[$r ... $idx] ]; |
|
|
9
|
|
|
|
|
34
|
|
|
356
|
9
|
100
|
|
|
|
40
|
return $row if ($rlen eq $len); |
|
357
|
6
|
|
|
|
|
11
|
push(@data, $row); |
|
358
|
6
|
|
|
|
|
18
|
$idx += $rlen; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
3
|
|
|
|
|
11
|
\@data; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Check to make sure the values are numbers, greater than 0. |
|
364
|
|
|
|
|
|
|
# Throw error if any problems are found. |
|
365
|
|
|
|
|
|
|
sub _checkNumValidity { |
|
366
|
58
|
|
|
58
|
|
82
|
my $value = shift; |
|
367
|
58
|
|
|
|
|
67
|
my $eString = "Invalid data."; |
|
368
|
58
|
50
|
|
|
|
261
|
if ($value !~ /^\d+\.?\d*$/) { |
|
369
|
0
|
|
|
|
|
0
|
_error(8); |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# Very low cell counts (< 5) aren't great, either. Maybe a |
|
373
|
|
|
|
|
|
|
# warning is in order. |
|
374
|
58
|
50
|
|
|
|
131
|
if ($value < 5) { |
|
375
|
0
|
|
|
|
|
0
|
warn "Warning: Very small cell frequency (freq. < 5) found.\n", |
|
376
|
|
|
|
|
|
|
"This will reduce the reliability of the test.\n"; |
|
377
|
|
|
|
|
|
|
} |
|
378
|
58
|
50
|
|
|
|
133
|
if ($value == 0) { |
|
379
|
0
|
|
|
|
|
0
|
_error(9); |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Take a filehandle, return an array reference. |
|
384
|
|
|
|
|
|
|
sub _fileread { |
|
385
|
6
|
|
|
6
|
|
13
|
my $data = shift; |
|
386
|
6
|
|
|
|
|
8
|
my @a; |
|
387
|
6
|
|
|
|
|
292
|
while (<$data>) { |
|
388
|
9
|
|
|
|
|
47
|
my @row = split(/\s+/,$_); |
|
389
|
9
|
|
|
|
|
64
|
push(@a, \@row); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
6
|
|
|
|
|
122
|
\@a; |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# Try to take whatever input format is thrown at us, and turn it into |
|
395
|
|
|
|
|
|
|
# an array reference, or just bail out completely if we can't figure |
|
396
|
|
|
|
|
|
|
# it out. |
|
397
|
|
|
|
|
|
|
sub _getHandle { |
|
398
|
13
|
|
|
13
|
|
27
|
my $data = shift; |
|
399
|
13
|
100
|
|
|
|
60
|
if (ref $data) { |
|
400
|
7
|
100
|
|
|
|
45
|
if ($data =~ /\AARRAY/) { |
|
401
|
3
|
100
|
|
|
|
11
|
if (ref $data->[0]) { |
|
402
|
2
|
|
|
|
|
7
|
return $data; |
|
403
|
|
|
|
|
|
|
} |
|
404
|
|
|
|
|
|
|
else { |
|
405
|
1
|
|
|
|
|
4
|
return [ $data ]; |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
} |
|
408
|
4
|
100
|
|
|
|
22
|
if ($data =~ /\AGLOB/) { |
|
409
|
2
|
|
|
|
|
6
|
return _fileread($data); |
|
410
|
|
|
|
|
|
|
} |
|
411
|
2
|
50
|
|
|
|
12
|
if ($data =~ /\AIO::File=GLOB/) { |
|
412
|
2
|
|
|
|
|
8
|
return _fileread($data); |
|
413
|
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
else { |
|
415
|
0
|
|
|
|
|
0
|
_error(10); |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
else { |
|
419
|
6
|
|
|
|
|
57
|
my $fh = new IO::File; |
|
420
|
6
|
100
|
|
|
|
278
|
if ($fh->open("< $data")) { |
|
421
|
2
|
|
|
|
|
156
|
return _fileread($fh); |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
else { |
|
424
|
4
|
|
|
|
|
140
|
my @a = split(/\s+/,$data); |
|
425
|
4
|
|
|
|
|
27
|
return [ \@a ]; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
} |
|
428
|
0
|
|
|
|
|
|
undef $fh; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
0
|
|
|
|
|
|
0; |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# Keep all the error strings in one place. |
|
434
|
|
|
|
|
|
|
sub _error { |
|
435
|
0
|
|
|
0
|
|
|
my ($code) = shift; |
|
436
|
0
|
|
|
|
|
|
my $errorbar = "\n!-----------------!\n"; |
|
437
|
0
|
|
|
|
|
|
my @e = ( |
|
438
|
|
|
|
|
|
|
"", # Normal execution |
|
439
|
|
|
|
|
|
|
"Input error: Must pass in a filename for a file containing the data.\n\tEx: \$g = new Statistics::Gtest(\$datafile);", |
|
440
|
|
|
|
|
|
|
"Input error: Can't use array or hash as data; pass array ref.", |
|
441
|
|
|
|
|
|
|
"Input error: Invalid data format.", |
|
442
|
|
|
|
|
|
|
"Input error: Data must be structured as array of array refs, with each array ref |
|
443
|
|
|
|
|
|
|
pointing to one row of data.", |
|
444
|
|
|
|
|
|
|
"Data inconsistency: No data found in table.", |
|
445
|
|
|
|
|
|
|
"Data inconsistency: sum of table values does not match total.", |
|
446
|
|
|
|
|
|
|
"Data inconsistency: could not read row or column.", |
|
447
|
|
|
|
|
|
|
"Data invalid: All data must be positive integers.", |
|
448
|
|
|
|
|
|
|
"Data invalid: Found a zero frequency value. Can't continue.", |
|
449
|
|
|
|
|
|
|
"Input error: Reference to invalid data type; must be reference to array.", |
|
450
|
|
|
|
|
|
|
); |
|
451
|
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
|
print $errorbar; |
|
453
|
0
|
|
|
|
|
|
print "\n Error",Carp::shortmess(); |
|
454
|
0
|
|
|
|
|
|
print " ", $e[$code], "\n"; |
|
455
|
0
|
|
|
|
|
|
print Carp::longmess(" Stack trace:\n"); |
|
456
|
0
|
|
|
|
|
|
print $errorbar; |
|
457
|
0
|
|
|
|
|
|
exit $code; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
1; |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
__END__ |