line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Distributions::GTest; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
29201
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
5
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
95
|
|
6
|
1
|
|
|
1
|
|
4
|
use List::Util qw( sum max ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
125
|
|
7
|
1
|
|
|
1
|
|
742
|
use Math::Cephes qw(:explog); |
|
1
|
|
|
|
|
7004
|
|
|
1
|
|
|
|
|
299
|
|
8
|
1
|
|
|
1
|
|
837
|
use Statistics::Distributions qw( chisqrdistr chisqrprob ); |
|
1
|
|
|
|
|
3303
|
|
|
1
|
|
|
|
|
99
|
|
9
|
1
|
|
|
1
|
|
851
|
use Text::SimpleTable; |
|
1
|
|
|
|
|
3342
|
|
|
1
|
|
|
|
|
28
|
|
10
|
1
|
|
|
1
|
|
1382
|
use Contextual::Return; |
|
1
|
|
|
|
|
34564
|
|
|
1
|
|
|
|
|
8
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Statistics::Distributions::GTest - Perl implementation of the Log-Likelihood Ratio Test (G-test) of Independence. |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=cut |
16
|
|
|
|
|
|
|
=head1 VERSION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
This document describes Statistics::Distributions::GTest version 0.1.5. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
21
|
1
|
|
|
1
|
|
2865
|
use version; our $VERSION = qv('0.1.5'); # next release 0.1.1... |
|
1
|
|
|
|
|
1904
|
|
|
1
|
|
|
|
|
4
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
use Statistics::Distributions::GTest; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Create an GTest object. |
27
|
|
|
|
|
|
|
my $gtest = Statistics::Distributions::GTest->new(); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# A 3x3 example. Data is sent to object a reference to a LoL. |
30
|
|
|
|
|
|
|
my $a_ref = [ |
31
|
|
|
|
|
|
|
[ 458, 537 ,345], |
32
|
|
|
|
|
|
|
[ 385, 457 ,456], |
33
|
|
|
|
|
|
|
[ 332, 376 ,364 ], |
34
|
|
|
|
|
|
|
]; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# Feed the object the data by passing reference with named argument 'table'. |
37
|
|
|
|
|
|
|
$gtest->read_data ( { table => $a_ref } ); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Perform the analysis using one of the two methods - see DESCRIPTION. |
40
|
|
|
|
|
|
|
$gtest->G(); |
41
|
|
|
|
|
|
|
#$gtest->G_alt(); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Print a table of the calculated expected values. |
44
|
|
|
|
|
|
|
$gtest->print_expected(); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# To access results use results method. The return of this method is context dependent (see METHODS). |
47
|
|
|
|
|
|
|
# To print a report to STDOUT call results in VOID context - may also call in BOOLEAN, NUMERIC and LIST (see METHODS). |
48
|
|
|
|
|
|
|
$gtest->results(); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=cut |
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
The G-test of independence is an alternative to the chi-square test of independence for testing for independence in |
54
|
|
|
|
|
|
|
contingency tables. G-tests are coming into increasing use and as with the chi-square test for independence the G-test |
55
|
|
|
|
|
|
|
for independence is used when you have two nominal variables each with two or more possible values. The null hypothesis |
56
|
|
|
|
|
|
|
is that the relative proportions of one variable are independent of the second variable. This module implements two two |
57
|
|
|
|
|
|
|
equivalent, but marginally different approaches to calculate G scores (that described in |
58
|
|
|
|
|
|
|
http://en.wikipedia.org/wiki/G-test and that used by http://udel.edu/~mcdonald/statgtestind.html). Benchmarking |
59
|
|
|
|
|
|
|
indicates that first approach works about a third faster than the alternative. However, this difference |
60
|
|
|
|
|
|
|
diminishes as the categories increase. See http://en.wikipedia.org/wiki/G-test and |
61
|
|
|
|
|
|
|
http://udel.edu/~mcdonald/statgtestind.html. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=cut |
64
|
|
|
|
|
|
|
=head1 METHODS |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
####################################################################################################################### |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub new { |
71
|
|
|
|
|
|
|
|
72
|
0
|
|
|
0
|
0
|
|
my ($class, $all_h_ref ) = @_; |
73
|
0
|
0
|
0
|
|
|
|
croak qq{\nArguments must be passed as HASH reference.} |
74
|
|
|
|
|
|
|
if ( ( $all_h_ref ) && ( ref $all_h_ref ne q{HASH} ) ); |
75
|
0
|
|
|
|
|
|
my $self = {}; |
76
|
0
|
|
|
|
|
|
bless $self, $class; |
77
|
0
|
|
|
|
|
|
return $self; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
=head2 new |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Create a new Statistics::Distributions::GTest object. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
my $gtest = Statistics::Distributions::GTest->new(); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub read_data { |
88
|
0
|
|
|
0
|
0
|
|
my ( $self, $all_h_ref ) = @_; |
89
|
|
|
|
|
|
|
|
90
|
0
|
0
|
|
|
|
|
$all_h_ref or croak qq{\nYou must pass me some data}; |
91
|
0
|
0
|
0
|
|
|
|
croak qq{\nThe data must be passed within HASH reference pointed to by key \'table\'.} |
92
|
|
|
|
|
|
|
if ( ( ref $all_h_ref ne q{HASH} ) || ( !exists $all_h_ref->{table} ) ); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#y unpack data |
95
|
0
|
|
|
|
|
|
my $data_a_ref = $all_h_ref->{table}; |
96
|
|
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
$data_a_ref or croak qq{\nkey \'table\' points to nothing}; |
98
|
0
|
0
|
|
|
|
|
croak qq{\nThe data pointed to by key \'table\' must be passed as ARRAY reference.} if ( ref $data_a_ref ne q{ARRAY} ); |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
$self->_data_checks($data_a_ref); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
#r/ WE MUST DEEP COPY THIS STRUCTURE |
103
|
0
|
|
|
|
|
|
my $deep_copied_ref = _deep_copy_arrays ($data_a_ref); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#y convert the numeric values to hashes that store various values |
106
|
0
|
|
|
|
|
|
$self->_inplace_conversion($deep_copied_ref); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#y simple flag for data reading |
109
|
0
|
|
|
|
|
|
$self->{properties}{analysis}{obs_table} = 1; |
110
|
0
|
|
|
|
|
|
return; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
=head2 read_data |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Used for loading data into object. Data is fed as a reference to a list of lists within an anonymous hash using the |
115
|
|
|
|
|
|
|
named argument 'table'. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$gtest->read_data ( { table => $LoL_ref } ); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub _data_checks { |
122
|
0
|
|
|
0
|
|
|
my ($self, $data_a_ref) = @_; |
123
|
|
|
|
|
|
|
#/ get rows |
124
|
0
|
|
|
|
|
|
my $rows = scalar(@{$data_a_ref}); |
|
0
|
|
|
|
|
|
|
125
|
0
|
0
|
0
|
|
|
|
croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows || $rows == 1 ); |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
#/ get cols - check first and then compare the rest |
128
|
0
|
|
|
|
|
|
my $cols = scalar(@{$data_a_ref->[0]}); |
|
0
|
|
|
|
|
|
|
129
|
0
|
0
|
0
|
|
|
|
croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols || $cols == 1 ); |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
for my $row (@{$data_a_ref}) { |
|
0
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
|
croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} ); |
134
|
0
|
0
|
|
|
|
|
croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols ); |
|
0
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# feed the object |
139
|
0
|
|
|
|
|
|
$self->{properties}{rows} = $rows; |
140
|
0
|
|
|
|
|
|
$self->{properties}{cols} = $cols; |
141
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
return; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
#/ beware: this is a prototype and only used for deep-copying - don´t return at end as its called recursively and you´ll kill the deep copy |
146
|
|
|
|
|
|
|
sub _deep_copy_arrays (\@) { |
147
|
0
|
|
|
0
|
|
|
my $data_structure = shift; |
148
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
if (!ref $data_structure) { $data_structure } |
|
0
|
0
|
|
|
|
|
|
150
|
|
|
|
|
|
|
elsif (ref $data_structure eq q{ARRAY} ) { |
151
|
0
|
|
|
|
|
|
[ map { _deep_copy_arrays ($_) } @{$data_structure} ]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
|
else { croak qq{\nYou must hand in an array ref. } } |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
#/ THIS METHOD IS CALLED RECURSIVELY! DON´T PUT RETURN IN HERE |
156
|
|
|
|
|
|
|
# return; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub _inplace_conversion { |
160
|
|
|
|
|
|
|
#/ in place converstion to 2-d matrix to cells |
161
|
0
|
|
|
0
|
|
|
my ($self, $a_ref) = @_; |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
for my $row (0..$#{$a_ref}) { |
|
0
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
for my $col (0..$#{$a_ref->[0]}) { |
|
0
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# unpack value |
167
|
0
|
|
|
|
|
|
my $observation = $a_ref->[$row][$col]; |
168
|
|
|
|
|
|
|
|
169
|
0
|
|
|
|
|
|
my $cell_h_ref = { observation => $observation, |
170
|
|
|
|
|
|
|
}; |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
$a_ref->[$row][$col] = $cell_h_ref; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
#y feed the object the deep-copied inplace modified array |
177
|
0
|
|
|
|
|
|
print qq{\n\nData passed all checks. Feeding $self->{properties}{rows} x $self->{properties}{cols} matrix to object.\n}; |
178
|
0
|
|
|
|
|
|
$self->{table} = $a_ref; |
179
|
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
return; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub diag { |
184
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
185
|
0
|
|
|
|
|
|
print qq{\n-------------------------------------------------------\nobject dumper:\n}, Dumper $self; |
186
|
0
|
|
|
|
|
|
print qq{-------------------------------------------------------\n\n}; |
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
|
return; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub _sum_a_row { |
192
|
0
|
|
|
0
|
|
|
my ( $self, $row ) = @_; # para de esquecer a colocar @_ quando unpacking!!! |
193
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
#y basically the NW way |
196
|
0
|
|
|
|
|
|
my $row_sum = sum map { $table_a_ref->[$row][$_]{observation} } ( 0..($self->{properties}{cols}-1) ); |
|
0
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
#y my way |
198
|
0
|
|
|
|
|
|
my $row_sum2 = sum map { $_->{observation} } @{$self->{table}[$row]}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
#y basically the NW way |
201
|
|
|
|
|
|
|
# my $col_sum1 = sum map { $table_a_ref->[$_][$col]{observation} } ( 0..($self->{properties}{rows}-1) ); |
202
|
|
|
|
|
|
|
#y my way |
203
|
|
|
|
|
|
|
# my $col_sum2 = sum map { $_->[$col]{observation} } @{$self->{table}}; |
204
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
return $row_sum; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _sum_rows { |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
0
|
|
|
my $self = shift; |
211
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
212
|
0
|
|
|
|
|
|
my @row_sums = (); |
213
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
for my $row (0..$#{$table_a_ref}) { |
|
0
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
push @row_sums, $self->_sum_a_row($row); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
$self->{properties}{row_sums} = [@row_sums]; |
221
|
|
|
|
|
|
|
|
222
|
0
|
|
|
|
|
|
return; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub _sum_a_col { |
226
|
0
|
|
|
0
|
|
|
my ( $self, $col ) = @_; # para de esquecer a colocar @_ quando unpacking!!! |
227
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
#y basically the NW way |
230
|
0
|
|
|
|
|
|
my $col_sum = sum map { $table_a_ref->[$_][$col]{observation} } ( 0..($self->{properties}{rows}-1) ); |
|
0
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
#y my way |
232
|
0
|
|
|
|
|
|
my $col_sum2 = sum map { $_->[$col]{observation} } @{$self->{table}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
return $col_sum; |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub _sum_cols { |
239
|
0
|
|
|
0
|
|
|
my $self = shift; |
240
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
241
|
0
|
|
|
|
|
|
my @col_sums = (); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#/ still need to fix this!!! use $self-{properties}{col}-1?!? - that way its all the same figure |
244
|
0
|
|
|
|
|
|
for my $col (0..$#{$table_a_ref->[0]}) { |
|
0
|
|
|
|
|
|
|
245
|
0
|
|
|
|
|
|
push @col_sums, $self->_sum_a_col($col); |
246
|
|
|
|
|
|
|
} |
247
|
0
|
|
|
|
|
|
$self->{properties}{col_sums} = [@col_sums]; |
248
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
|
return; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub _total { |
253
|
0
|
|
|
0
|
|
|
my $self = shift; |
254
|
0
|
|
|
|
|
|
my $total_from_rows = sum @{$self->{properties}{row_sums}}; |
|
0
|
|
|
|
|
|
|
255
|
0
|
|
|
|
|
|
my $total_from_cols = sum @{$self->{properties}{col_sums}}; |
|
0
|
|
|
|
|
|
|
256
|
0
|
|
|
|
|
|
$self->{properties}{total} = $total_from_rows; |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
return; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub _calculate_expected { |
262
|
0
|
|
|
0
|
|
|
my $self = shift; |
263
|
0
|
|
|
|
|
|
my $a_ref = $self->{table}; |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my @row_sums = @{$self->{properties}{row_sums}}; |
|
0
|
|
|
|
|
|
|
266
|
0
|
|
|
|
|
|
my @col_sums = @{$self->{properties}{col_sums}}; |
|
0
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $total = $self->{properties}{total}; |
268
|
|
|
|
|
|
|
|
269
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{properties}{rows}-1) ) { |
270
|
|
|
|
|
|
|
|
271
|
0
|
|
|
|
|
|
for my $col (0..($self->{properties}{cols}-1) ) { |
272
|
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
|
my $expected = ( $row_sums[$row] * $col_sums[$col] ) / $total; |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
$a_ref->[$row][$col]{expected} = $expected; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
0
|
|
|
|
|
|
$self->{properties}{analysis}{expect_table} = 1; |
280
|
0
|
|
|
|
|
|
return; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _calculate_f_ln_f { |
284
|
0
|
|
|
0
|
|
|
my $self = shift; |
285
|
0
|
|
|
|
|
|
my $a_ref = $self->{table}; |
286
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{properties}{rows}-1) ) { |
287
|
0
|
|
|
|
|
|
for my $col (0..($self->{properties}{cols}-1) ) { |
288
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
|
my $observed = $a_ref->[$row][$col]{observation}; |
290
|
0
|
|
|
|
|
|
my $f_ln_f = _f_ln_f ($observed); |
291
|
0
|
|
|
|
|
|
$a_ref->[$row][$col]{f_ln_f} = $f_ln_f; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
0
|
|
|
|
|
|
return; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
#/ beware: this is a prototype and not for use as an object method |
299
|
|
|
|
|
|
|
sub _f_ln_f($) { |
300
|
0
|
|
|
0
|
|
|
my $value = shift; |
301
|
0
|
0
|
|
|
|
|
$value = $value > 0.5 ? $value * ( log ( $value ) ) : 0 ; |
302
|
0
|
|
|
|
|
|
return $value; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub _calculate_G_traditional { |
306
|
0
|
|
|
0
|
|
|
my $self = shift; |
307
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
308
|
|
|
|
|
|
|
|
309
|
0
|
|
|
|
|
|
my $sum = 0; |
310
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{properties}{rows}-1) ) { |
312
|
0
|
|
|
|
|
|
for my $col (0..($self->{properties}{cols}-1) ) { |
313
|
|
|
|
|
|
|
|
314
|
0
|
|
|
|
|
|
my $cell = $table_a_ref->[$row][$col]{observation} * |
315
|
|
|
|
|
|
|
( log ( $table_a_ref->[$row][$col]{observation} / $table_a_ref->[$row][$col]{expected} ) ); |
316
|
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
$sum += $cell; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
0
|
|
|
|
|
|
my $G = 2 * $sum; |
321
|
|
|
|
|
|
|
|
322
|
0
|
|
|
|
|
|
$self->{properties}{G} = $G; |
323
|
|
|
|
|
|
|
|
324
|
0
|
|
|
|
|
|
return; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub _calculate_G_alternative { |
328
|
0
|
|
|
0
|
|
|
my $self = shift; |
329
|
0
|
|
|
|
|
|
my $table_a_ref = $self->{table}; |
330
|
0
|
|
|
|
|
|
my $sum = 0; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
#/ don´t need this part################################################### |
333
|
0
|
|
|
|
|
|
my @f_ln_f_for_row_sums = map { _f_ln_f($_) } @{$self->{properties}{row_sums}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
334
|
0
|
|
|
|
|
|
my @f_ln_f_for_col_sums = map { _f_ln_f($_) } @{$self->{properties}{col_sums}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
335
|
0
|
|
|
|
|
|
$self->{properties}{f_ln_f_for_row_sums} = [@f_ln_f_for_row_sums]; |
336
|
0
|
|
|
|
|
|
$self->{properties}{f_ln_f_for_col_sums} = [@f_ln_f_for_col_sums]; |
337
|
|
|
|
|
|
|
########################################################################## |
338
|
|
|
|
|
|
|
|
339
|
0
|
|
|
|
|
|
my $sum_of_f_ln_f_for_row_sums = sum map { _f_ln_f($_) } @{$self->{properties}{row_sums}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
340
|
0
|
|
|
|
|
|
my $sum_of_f_ln_f_for_col_sums = sum map { _f_ln_f($_) } @{$self->{properties}{col_sums}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
|
342
|
0
|
|
|
|
|
|
my $f_ln_f_for_total = _f_ln_f ($self->{properties}{total}); |
343
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
|
$self->{properties}{f_ln_f_for_total} = [$f_ln_f_for_total]; |
345
|
|
|
|
|
|
|
|
346
|
0
|
|
|
|
|
|
my $sum_f_ln_f = 0; |
347
|
|
|
|
|
|
|
|
348
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{properties}{rows}-1) ) { |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
#/ either |
351
|
|
|
|
|
|
|
#for my $col (0..($self->{properties}{cols}-1) ) { |
352
|
|
|
|
|
|
|
#$row = $table_a_ref->[$row][$col]{observation}; |
353
|
|
|
|
|
|
|
#/ or |
354
|
0
|
|
|
|
|
|
$row = sum map { $_->{f_ln_f} } @{$self->{table}[$row]}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
|
356
|
0
|
|
|
|
|
|
$sum_f_ln_f += $row; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
my $G = 2 * ( ( $sum_f_ln_f + $f_ln_f_for_total ) - |
360
|
|
|
|
|
|
|
( $sum_of_f_ln_f_for_row_sums + $sum_of_f_ln_f_for_col_sums ) ); |
361
|
|
|
|
|
|
|
|
362
|
0
|
|
|
|
|
|
$self->{properties}{G} = $G; |
363
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
return; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
sub G { |
368
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
369
|
|
|
|
|
|
|
#y check data loaded flag |
370
|
0
|
0
|
|
|
|
|
croak qq{\nYou have to load some data before calling this method} if ( !exists $self->{properties}{analysis}{obs_table} ); |
371
|
0
|
|
|
|
|
|
$self->_sum_rows(); |
372
|
0
|
|
|
|
|
|
$self->_sum_cols(); |
373
|
0
|
|
|
|
|
|
$self->_total(); |
374
|
0
|
|
|
|
|
|
$self->_calculate_expected(); |
375
|
0
|
|
|
|
|
|
$self->_calculate_G_traditional(); |
376
|
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
|
$self->_df(); |
378
|
0
|
|
|
|
|
|
$self->_calculate_p_value(); |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#y flag to check this has run |
381
|
0
|
|
|
|
|
|
$self->{properties}{analysis}{G} = 1; |
382
|
0
|
|
|
|
|
|
return; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
=head2 G |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
To calculate G value. This method implements the calculation described in http://en.wikipedia.org/wiki/G-test. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
$gtest->G(); |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=cut |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
#/ make this redundant and using self->{prop}{G} for both G calculation methods - create a method to compare the two computed values. |
394
|
|
|
|
|
|
|
sub G_alt { |
395
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
396
|
|
|
|
|
|
|
#y check data loaded flag |
397
|
0
|
0
|
|
|
|
|
croak qq{\nYou have to load some data before calling this method} if ( !exists $self->{properties}{analysis}{obs_table} ); |
398
|
0
|
|
|
|
|
|
$self->_sum_rows(); |
399
|
0
|
|
|
|
|
|
$self->_sum_cols(); |
400
|
0
|
|
|
|
|
|
$self->_total(); |
401
|
0
|
|
|
|
|
|
$self->_calculate_f_ln_f(); |
402
|
0
|
|
|
|
|
|
$self->_calculate_G_alternative(); |
403
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
$self->_df(); |
405
|
0
|
|
|
|
|
|
$self->_calculate_p_value(); |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#y flag to check this has run |
408
|
0
|
|
|
|
|
|
$self->{properties}{analysis}{G_alt} = 1; |
409
|
0
|
|
|
|
|
|
return; |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
=head2 G_alt |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
To calculate G you may also use this method. This method implements procedure described in |
415
|
|
|
|
|
|
|
http://udel.edu/~mcdonald/statgtestind.html. This approach does not directly generate a table of expected values. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
$gtest->G_alt(); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=cut |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub _df { |
422
|
0
|
|
|
0
|
|
|
my $self = shift; |
423
|
0
|
|
|
|
|
|
my $df = ( $self->{properties}{rows}-1 ) * ( $self->{properties}{cols}-1 ); |
424
|
0
|
|
|
|
|
|
$self->{properties}{df} = $df; |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
return; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub _calculate_p_value { |
430
|
0
|
|
|
0
|
|
|
my $self = shift; |
431
|
0
|
|
|
|
|
|
my $chisprob = chisqrprob ($self->{properties}{df}, $self->{properties}{G}); |
432
|
0
|
|
|
|
|
|
$self->{properties}{p_value} = $chisprob; |
433
|
|
|
|
|
|
|
|
434
|
0
|
|
|
|
|
|
return; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub _configure_table { |
438
|
0
|
|
|
0
|
|
|
my ( $self, $which ) = @_; |
439
|
0
|
|
|
|
|
|
my $max_len = 0; |
440
|
0
|
|
|
|
|
|
for my $row (@{$self->{table}}) { |
|
0
|
|
|
|
|
|
|
441
|
0
|
|
|
|
|
|
my $temp = max map { length( sprintf ( q{%.0f}, $_->{$which}) ) } @{$row} ; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
442
|
0
|
0
|
|
|
|
|
$max_len = $temp > $max_len ? $temp : $max_len; |
443
|
|
|
|
|
|
|
} |
444
|
0
|
|
|
|
|
|
return ($max_len) x $self->{properties}{cols}; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub _print_table { |
448
|
0
|
|
|
0
|
|
|
my ( $self, $which ) = @_; |
449
|
|
|
|
|
|
|
|
450
|
0
|
0
|
0
|
|
|
|
( $which eq q{observation} || $which eq q{expected} ) || |
451
|
|
|
|
|
|
|
croak qq{\nUsage is \$object->print_observed or \$object->print_expected}; |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
#croak qq{\nUsage is \$object->print_observed or \$object->print_expected} |
454
|
|
|
|
|
|
|
#unless ( ( $which eq q{observed} ) || ( $which eq q{expected} ) ); |
455
|
|
|
|
|
|
|
#if !( ( $which eq q{observed} ) || ( $which eq q{expected} ) ); |
456
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
my $table = Text::SimpleTable->new($self->_configure_table($which)); |
458
|
0
|
|
|
|
|
|
my $count = 0; |
459
|
0
|
|
|
|
|
|
for my $row (@{$self->{table}}) { |
|
0
|
|
|
|
|
|
|
460
|
0
|
0
|
|
|
|
|
$table->hr if ( $count != 0 ); |
461
|
0
|
|
|
|
|
|
$table->row( ( map { sprintf ( q{%.0f}, $_->{$which} ) } @{$row} ) ); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
462
|
0
|
|
|
|
|
|
$count++; |
463
|
|
|
|
|
|
|
} |
464
|
0
|
|
|
|
|
|
print qq{\nTable of $which values:\n}, $table->draw; |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
|
return; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub print_expected { |
470
|
|
|
|
|
|
|
# convinience method - stops calling of private _print_table |
471
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#y check data loaded flag |
474
|
0
|
0
|
|
|
|
|
croak qq{\nYou have to load some data before calling this method} if ( !exists $self->{properties}{analysis}{obs_table} ); |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
#y check whether _calculated_expected was called |
477
|
0
|
0
|
0
|
|
|
|
if ( ( exists $self->{properties}{analysis}{G_alt} ) && ( !exists $self->{properties}{analysis}{expect_table} ) ) { |
478
|
0
|
|
|
|
|
|
print qq{\nIt appears you ran the alternative procedure that does not directly generate an expected values table. } |
479
|
|
|
|
|
|
|
.qq{\nGenerating expected values table now.\n}; |
480
|
0
|
|
|
|
|
|
$self->_calculate_expected(); |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
#y check the analysis flags |
484
|
|
|
|
|
|
|
#y !$x and !$y and croak |
485
|
0
|
0
|
0
|
|
|
|
croak qq{\nYou must run the analysis with either method G or G_alt first} |
486
|
|
|
|
|
|
|
if ( ( !exists $self->{properties}{analysis}{G} ) && ( !exists $self->{properties}{analysis}{G_alt} ) ); |
487
|
|
|
|
|
|
|
|
488
|
0
|
|
|
|
|
|
$self->_print_table(q{expected}); |
489
|
0
|
|
|
|
|
|
return; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
=head2 print_expected |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Prints a table of the calculated expected values to STDOUT. If you used G_alt to calculate G it will first generated the |
494
|
|
|
|
|
|
|
table of excpeted values. |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
$gtest->print_expected(); |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=cut |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub print_observed { |
501
|
|
|
|
|
|
|
# convinience method - stops calling of private _print_table |
502
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
503
|
|
|
|
|
|
|
#y check data loaded flag |
504
|
0
|
0
|
|
|
|
|
croak qq{\nYou have to load some data before calling this method} if ( !exists $self->{properties}{analysis}{obs_table} ); |
505
|
0
|
|
|
|
|
|
$self->_print_table(q{observation}); |
506
|
0
|
|
|
|
|
|
return; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
=head2 print_observed |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Prints a table of the observation values to STDOUT. |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
$gtest->print_observed(); |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=cut |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
sub results { |
517
|
0
|
|
|
0
|
0
|
|
my ($self, $thing ) = @_; |
518
|
|
|
|
|
|
|
#y check the analysis flags |
519
|
0
|
0
|
0
|
|
|
|
croak qq{\nYou must run the analysis with either method G or G_alt first} |
520
|
|
|
|
|
|
|
if ( ( !exists $self->{properties}{analysis}{G} ) && ( !exists $self->{properties}{analysis}{G_alt} ) ); |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
#y preferentially grab the traditional method result |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
#my $G = ( !exists $self->{properties}{G} ) ? $self->{properties}{G} : $self->{properties}{G_alt}; |
525
|
|
|
|
|
|
|
#/ twat! |
526
|
|
|
|
|
|
|
#my $G = exists $self->{properties}{G}; |
527
|
0
|
|
|
|
|
|
my $G = $self->{properties}{G}; |
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
|
my $df = $self->{properties}{df}; |
530
|
0
|
|
|
|
|
|
my $p_val = $self->{properties}{p_value}; |
531
|
|
|
|
|
|
|
|
532
|
0
|
0
|
|
|
|
|
if ( $p_val < 1e-05 ) { $p_val = sprintf (q{%e}, $p_val) } |
|
0
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
|
534
|
0
|
|
|
|
|
|
$G = sprintf ( q{%.5f}, $G ); |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
return ( |
537
|
0
|
|
|
0
|
|
|
VOID { $self->_void ($G, $df, $p_val) } |
538
|
0
|
|
|
0
|
|
|
LIST { ($G, $df, $p_val ) } |
539
|
0
|
|
|
0
|
|
|
BOOL { $self->_boolean($G, $df, $thing) } |
540
|
0
|
|
|
0
|
|
|
NUM { $G ; } |
541
|
0
|
|
|
|
|
|
); |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
=head2 results |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
Used to access the results of the G-test calculation. This method is context-dependent and will return a variety of |
547
|
|
|
|
|
|
|
different values depending on its calling context. In VOID context it simply prints the calculated value of G, df and |
548
|
|
|
|
|
|
|
the p_value in a table to STDOUT. |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
$gtest->results(); |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
In BOOLEAN context it requires you to pass it a value for the significance level of the test you wish to apply e.g. |
553
|
|
|
|
|
|
|
0.05. It returns True or False depending on whether the null hypothesis is rejected at that significance level. |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
# test if the result is significant at the p = 0.05 level. |
556
|
|
|
|
|
|
|
if ($gtest->results( 0.05 )) { print qq{\nthis is significant } } else { print qq{\nthis is not significant} } |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
In LIST context it simply returns a LIST of the calculated values of G, df and p for the observation data. |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
my ($G, $df, $p) = $gtest->results(); |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
In NUMERIC context it returns the calculated value of G. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
print qq{\n\nG in numeric is: }, 0+$gtest->results(); |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub _void { |
569
|
0
|
|
|
0
|
|
|
my ( $self, $G, $df, $p_val ) = @_; |
570
|
|
|
|
|
|
|
|
571
|
0
|
0
|
|
|
|
|
my $g_len = length ($G) > 7 ? length ($G) : 7; |
572
|
0
|
0
|
|
|
|
|
my $df_len = length ($df) > 3 ? length ($df) : 3; |
573
|
0
|
0
|
|
|
|
|
my $p_len = length ($p_val) > 7 ? length ($p_val) : 7; |
574
|
|
|
|
|
|
|
|
575
|
0
|
|
|
|
|
|
my $table = Text::SimpleTable->new($g_len, $df_len, $p_len); |
576
|
0
|
|
|
|
|
|
$table->row( qw/ G_value df p_value / ); |
577
|
0
|
|
|
|
|
|
$table->hr; |
578
|
0
|
|
|
|
|
|
$table->row( $G, $df, $p_val ); |
579
|
0
|
|
|
|
|
|
print qq{\nTable of results:\n}, $table->draw; |
580
|
|
|
|
|
|
|
|
581
|
0
|
|
|
|
|
|
return; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub _boolean { |
585
|
0
|
|
|
0
|
|
|
my ($self, $G, $df, $sig) = @_; |
586
|
|
|
|
|
|
|
|
587
|
0
|
0
|
|
|
|
|
$sig or croak qq{\nYou must pass a p_value in BOOLEAN context}; |
588
|
|
|
|
|
|
|
|
589
|
0
|
|
|
|
|
|
print qq{\nsig $sig}; |
590
|
|
|
|
|
|
|
|
591
|
0
|
0
|
0
|
|
|
|
croak qq{\nThe p value must be numeric and in the range > 0 and < 1.} if ( $sig !~ /\A \d* \.? \d+ ([eE][+-]?\d+)? \z/xms || $sig <= 0 || $sig >= 1) ; |
|
|
|
0
|
|
|
|
|
592
|
|
|
|
|
|
|
#/ forgot to check for exponential numbers |
593
|
|
|
|
|
|
|
#if ( $sig !~ /\A[01]?\.\d{1,7}\z/xms || $sig <= 0 || $sig >= 1) ; |
594
|
|
|
|
|
|
|
|
595
|
0
|
0
|
|
|
|
|
$G = $G > chisqrdistr ( $df, $sig ) ? 1 : undef; |
596
|
0
|
|
|
|
|
|
return $G; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
__END__ |