line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::MVA::HotellingTwoSample; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
26219
|
use base Statistics::MVA; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1045
|
|
4
|
1
|
|
|
1
|
|
131028
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
5
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
36
|
|
6
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
81
|
|
7
|
1
|
|
|
1
|
|
8103
|
use Statistics::Distributions qw/fprob/; |
|
1
|
|
|
|
|
9621
|
|
|
1
|
|
|
|
|
121
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Statistics::MVA::HotellingTwoSample - Two-Sample Hotelling's T-Square Test Statistic. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=cut |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 VERSION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This document describes Statistics::MVA::HotellingTwoSample version 0.0.2 |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Statistics::MVA::HotellingTwoSample; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# we have two groups of data each with 4 variables and 9 observations. |
26
|
|
|
|
|
|
|
my $data_x = [ |
27
|
|
|
|
|
|
|
[qw/ 292 222 52 57/], |
28
|
|
|
|
|
|
|
[qw/ 100 227 51 45/], |
29
|
|
|
|
|
|
|
[qw/ 272 218 49 36/], |
30
|
|
|
|
|
|
|
[qw/ 101 221 17 47/], |
31
|
|
|
|
|
|
|
[qw/ 181 208 12 35/], |
32
|
|
|
|
|
|
|
[qw/ 111 118 51 54/], |
33
|
|
|
|
|
|
|
[qw/ 288 321 51 49/], |
34
|
|
|
|
|
|
|
[qw/ 286 219 52 45/], |
35
|
|
|
|
|
|
|
[qw/ 262 225 47 44/], |
36
|
|
|
|
|
|
|
]; |
37
|
|
|
|
|
|
|
my $data_y = [ |
38
|
|
|
|
|
|
|
[qw/ 286 107 29 62/], |
39
|
|
|
|
|
|
|
[qw/ 311 122 29 63/], |
40
|
|
|
|
|
|
|
[qw/ 272 131 52 86/], |
41
|
|
|
|
|
|
|
[qw/ 182 88 23 69/], |
42
|
|
|
|
|
|
|
[qw/ 211 118 61 57/], |
43
|
|
|
|
|
|
|
[qw/ 323 127 51 79/], |
44
|
|
|
|
|
|
|
[qw/ 385 332 70 63/], |
45
|
|
|
|
|
|
|
[qw/ 373 127 85 60/], |
46
|
|
|
|
|
|
|
[qw/ 408 95 57 71/], |
47
|
|
|
|
|
|
|
]; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Create a Statistics::MVA::HotellingTwoSample object and pass the data as two Lists-of-Lists within an anonymous array. |
50
|
|
|
|
|
|
|
my $mva = Statistics::MVA::HotellingTwoSample->new([ $data_x, $data_y ]); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Generate results and print a report to STDOUT by calling hotelling_two_sample in void context. |
53
|
|
|
|
|
|
|
$mva->hotelling_two_sample; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Call hotelling_two_sample in LIST-context to access the results directly. |
56
|
|
|
|
|
|
|
my ($T2, $F, $pval, $df1, $df2) = $mva->hotelling_two_sample; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 DESCRIPTION |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Hotelling's T-square statistics is a generalisation of Student's t statistic that is used for multivariate hypothesis |
63
|
|
|
|
|
|
|
testing. See http://en.wikipedia.org/wiki/Hotelling%27s_T-square_distribution. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
66
|
|
|
|
|
|
|
|
67
|
1
|
|
|
1
|
|
15
|
use version; our $VERSION = qv('0.0.2'); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
14
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub hotelling_two_sample { |
70
|
|
|
|
|
|
|
|
71
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
my $k = $self->[1]; |
74
|
0
|
0
|
|
|
|
|
croak qq{\nThis is a two-sample test - you must give two samples} if $k != 2; |
75
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
my $n_x = $self->[0][0][1]; |
77
|
0
|
|
|
|
|
|
my $n_y = $self->[0][1][1]; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
#y just variable number - again will need to check its equal for sample 2 - Statistics::MVA already checked p is same for all matrices... |
81
|
0
|
|
|
|
|
|
my $p = $self->[2]; |
82
|
|
|
|
|
|
|
#print qq{\ntesting $n_x, $n_y and $p}; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
#y Just averages of for each variable - naughty changed interface to MVA |
85
|
|
|
|
|
|
|
#my @bar_x = &_bar($self->[3][0]); |
86
|
|
|
|
|
|
|
#my @bar_y = &_bar($self->[3][1]); |
87
|
0
|
|
|
|
|
|
my @bar_x = &_bar($self->[0][0][2]); |
88
|
0
|
|
|
|
|
|
my @bar_y = &_bar($self->[0][1][2]); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#y covariance matrices - this is already done!!! as part of MVA object creation |
91
|
0
|
|
|
|
|
|
my $V_x = $self->[0][0][0]; |
92
|
0
|
|
|
|
|
|
my $V_y = $self->[0][1][0]; |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
my $V_x_adj = $V_x->shadow; |
95
|
0
|
|
|
|
|
|
my $V_y_adj = $V_y->shadow; |
96
|
0
|
|
|
|
|
|
my $V_p = $V_x->shadow; |
97
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
$V_x_adj->multiply_scalar($V_x,$n_x-1); |
99
|
0
|
|
|
|
|
|
$V_y_adj->multiply_scalar($V_y,$n_y-1); |
100
|
0
|
|
|
|
|
|
$V_p->add($V_x_adj,$V_y_adj); |
101
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
my $divisor = 1 / ($n_x + $n_y - 2); |
103
|
0
|
|
|
|
|
|
$V_p->multiply_scalar($V_p,$divisor); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#y subtract means... |
106
|
0
|
|
|
|
|
|
my @bar_diff = map { $bar_x[$_] - $bar_y[$_] } (0..$#bar_x); |
|
0
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
my $bar_diff_mat = Math::MatrixReal->new_from_cols([[@bar_diff]]); |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my $dim; |
111
|
|
|
|
|
|
|
my $x; |
112
|
0
|
|
|
|
|
|
my $B_matreal; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#/ using matrixreal - solve equation 'a %*% x = b' for 'x', - 'b' can be either a vector or a matrix. |
115
|
0
|
|
|
|
|
|
my $LR = $V_p->decompose_LR(); |
116
|
0
|
0
|
|
|
|
|
if (!(($dim,$x,$B_matreal) = $LR->solve_LR($bar_diff_mat))) { croak qq{\nsystem has no solution} } |
|
0
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
#print qq{\n\nhere is the matrixreal solution\n}, $x; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
#/ using Cephes |
120
|
|
|
|
|
|
|
#my $M = Math::Cephes::Matrix->new($V_p->[0]); |
121
|
|
|
|
|
|
|
#my $B = [@bar_diff]; |
122
|
|
|
|
|
|
|
#my $solution = $M->simq($B); |
123
|
|
|
|
|
|
|
#print qq{\nhere is the cephes solution @{$solution}\n}; |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $crossprod = ~$bar_diff_mat * $x; |
126
|
0
|
|
|
|
|
|
my $df1 = $p; |
127
|
0
|
|
|
|
|
|
my $df2 = $n_x + $n_y-$p-1; |
128
|
0
|
|
|
|
|
|
my $other = $n_x + $n_y - 2; |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $T2 = ($crossprod->[0][0][0] * $df2 * (($n_x * $n_y)/($n_x+$n_y)) * $other * $p) / (($n_x+$n_y-2) * $p * $df2); |
131
|
0
|
|
|
|
|
|
my $F = ( $df2 * $T2 ) / ($other * $df1); |
132
|
0
|
|
|
|
|
|
my $pval = &fprob($df1,$df2,$F); |
133
|
0
|
0
|
|
|
|
|
$pval = $pval < 1e-8 ? q{< 1e-8} : $pval; |
134
|
|
|
|
|
|
|
|
135
|
0
|
0
|
|
|
|
|
if ( !wantarray ) { print qq{\nT^2 = $T2\nF = $F \ndf1 = $df1\ndf2 = $df2 \np.value = $pval}; } |
|
0
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
else { return ( $T2, $F, $pval, $df1, $df2 ) } |
137
|
0
|
|
|
|
|
|
return; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub _bar { |
141
|
0
|
|
|
0
|
|
|
my $lol = shift; |
142
|
0
|
|
|
|
|
|
my $rows = scalar @{$lol}; |
|
0
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
my $cols = scalar @{$lol->[0]}; |
|
0
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
my @bar; |
145
|
0
|
|
|
|
|
|
for my $col (0..$cols-1) { |
146
|
0
|
|
|
|
|
|
my $sum = 0; |
147
|
0
|
|
|
|
|
|
for my $row (0..$rows-1) { |
148
|
0
|
|
|
|
|
|
$sum += $lol->[$row][$col]; |
149
|
|
|
|
|
|
|
} |
150
|
0
|
|
|
|
|
|
push @bar, $sum/$rows; |
151
|
|
|
|
|
|
|
} |
152
|
0
|
|
|
|
|
|
$, = q{, }; |
153
|
0
|
|
|
|
|
|
return @bar; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
__END__ |