File Coverage

blib/lib/Statistics/MVA/HotellingTwoSample.pm

Criterion	Covered	Total	%
statement	18	69	26.0
branch	0	8	0.0
condition			n/a
subroutine	6	8	75.0
pod	0	1	0.0
total	24	86	27.9

line	stmt	bran	sub	pod	time	code
1						package Statistics::MVA::HotellingTwoSample;
2
3	1		1		26219	use base Statistics::MVA;
	1				3
	1				1045
4	1		1		131028	use warnings;
	1				3
	1				36
5	1		1		5	use strict;
	1				8
	1				36
6	1		1		6	use Carp;
	1				1
	1				81
7	1		1		8103	use Statistics::Distributions qw/fprob/;
	1				9621
	1				121
8
9						=head1 NAME
10
11						Statistics::MVA::HotellingTwoSample - Two-Sample Hotelling's T-Square Test Statistic.
12
13						=cut
14
15						=head1 VERSION
16
17						This document describes Statistics::MVA::HotellingTwoSample version 0.0.2
18
19						=cut
20
21						=head1 SYNOPSIS
22
23						use Statistics::MVA::HotellingTwoSample;
24
25						# we have two groups of data each with 4 variables and 9 observations.
26						my $data_x = [
27						[qw/ 292 222 52 57/],
28						[qw/ 100 227 51 45/],
29						[qw/ 272 218 49 36/],
30						[qw/ 101 221 17 47/],
31						[qw/ 181 208 12 35/],
32						[qw/ 111 118 51 54/],
33						[qw/ 288 321 51 49/],
34						[qw/ 286 219 52 45/],
35						[qw/ 262 225 47 44/],
36						];
37						my $data_y = [
38						[qw/ 286 107 29 62/],
39						[qw/ 311 122 29 63/],
40						[qw/ 272 131 52 86/],
41						[qw/ 182 88 23 69/],
42						[qw/ 211 118 61 57/],
43						[qw/ 323 127 51 79/],
44						[qw/ 385 332 70 63/],
45						[qw/ 373 127 85 60/],
46						[qw/ 408 95 57 71/],
47						];
48
49						# Create a Statistics::MVA::HotellingTwoSample object and pass the data as two Lists-of-Lists within an anonymous array.
50						my $mva = Statistics::MVA::HotellingTwoSample->new([ $data_x, $data_y ]);
51
52						# Generate results and print a report to STDOUT by calling hotelling_two_sample in void context.
53						$mva->hotelling_two_sample;
54
55						# Call hotelling_two_sample in LIST-context to access the results directly.
56						my ($T2, $F, $pval, $df1, $df2) = $mva->hotelling_two_sample;
57
58						=cut
59
60						=head1 DESCRIPTION
61
62						Hotelling's T-square statistics is a generalisation of Student's t statistic that is used for multivariate hypothesis
63						testing. See http://en.wikipedia.org/wiki/Hotelling%27s_T-square_distribution.
64
65						=cut
66
67	1		1		15	use version; our $VERSION = qv('0.0.2');
	1				2
	1				14
68
69						sub hotelling_two_sample {
70
71	0		0	0		my $self = shift;
72
73	0					my $k = $self->[1];
74	0	0				croak qq{\nThis is a two-sample test - you must give two samples} if $k != 2;
75
76	0					my $n_x = $self->[0][0][1];
77	0					my $n_y = $self->[0][1][1];
78
79
80						#y just variable number - again will need to check its equal for sample 2 - Statistics::MVA already checked p is same for all matrices...
81	0					my $p = $self->[2];
82						#print qq{\ntesting $n_x, $n_y and $p};
83
84						#y Just averages of for each variable - naughty changed interface to MVA
85						#my @bar_x = &_bar($self->[3][0]);
86						#my @bar_y = &_bar($self->[3][1]);
87	0					my @bar_x = &_bar($self->[0][0][2]);
88	0					my @bar_y = &_bar($self->[0][1][2]);
89
90						#y covariance matrices - this is already done!!! as part of MVA object creation
91	0					my $V_x = $self->[0][0][0];
92	0					my $V_y = $self->[0][1][0];
93
94	0					my $V_x_adj = $V_x->shadow;
95	0					my $V_y_adj = $V_y->shadow;
96	0					my $V_p = $V_x->shadow;
97
98	0					$V_x_adj->multiply_scalar($V_x,$n_x-1);
99	0					$V_y_adj->multiply_scalar($V_y,$n_y-1);
100	0					$V_p->add($V_x_adj,$V_y_adj);
101
102	0					my $divisor = 1 / ($n_x + $n_y - 2);
103	0					$V_p->multiply_scalar($V_p,$divisor);
104
105						#y subtract means...
106	0					my @bar_diff = map { $bar_x[$_] - $bar_y[$_] } (0..$#bar_x);
	0
107
108	0					my $bar_diff_mat = Math::MatrixReal->new_from_cols([[@bar_diff]]);
109
110	0					my $dim;
111						my $x;
112	0					my $B_matreal;
113
114						#/ using matrixreal - solve equation 'a %*% x = b' for 'x', - 'b' can be either a vector or a matrix.
115	0					my $LR = $V_p->decompose_LR();
116	0	0				if (!(($dim,$x,$B_matreal) = $LR->solve_LR($bar_diff_mat))) { croak qq{\nsystem has no solution} }
	0
117						#print qq{\n\nhere is the matrixreal solution\n}, $x;
118
119						#/ using Cephes
120						#my $M = Math::Cephes::Matrix->new($V_p->[0]);
121						#my $B = [@bar_diff];
122						#my $solution = $M->simq($B);
123						#print qq{\nhere is the cephes solution @{$solution}\n};
124
125	0					my $crossprod = ~$bar_diff_mat * $x;
126	0					my $df1 = $p;
127	0					my $df2 = $n_x + $n_y-$p-1;
128	0					my $other = $n_x + $n_y - 2;
129
130	0					my $T2 = ($crossprod->[0][0][0] * $df2 * (($n_x * $n_y)/($n_x+$n_y)) * $other * $p) / (($n_x+$n_y-2) * $p * $df2);
131	0					my $F = ( $df2 * $T2 ) / ($other * $df1);
132	0					my $pval = &fprob($df1,$df2,$F);
133	0	0				$pval = $pval < 1e-8 ? q{< 1e-8} : $pval;
134
135	0	0				if ( !wantarray ) { print qq{\nT^2 = $T2\nF = $F \ndf1 = $df1\ndf2 = $df2 \np.value = $pval}; }
	0
136	0					else { return ( $T2, $F, $pval, $df1, $df2 ) }
137	0					return;
138						}
139
140						sub _bar {
141	0		0			my $lol = shift;
142	0					my $rows = scalar @{$lol};
	0
143	0					my $cols = scalar @{$lol->[0]};
	0
144	0					my @bar;
145	0					for my $col (0..$cols-1) {
146	0					my $sum = 0;
147	0					for my $row (0..$rows-1) {
148	0					$sum += $lol->[$row][$col];
149						}
150	0					push @bar, $sum/$rows;
151						}
152	0					$, = q{, };
153	0					return @bar;
154						}
155
156						1; # Magic true value required at end of module
157
158						__END__