File Coverage

blib/lib/Statistics/MVA/HotellingTwoSample.pm
Criterion Covered Total %
statement 18 69 26.0
branch 0 8 0.0
condition n/a
subroutine 6 8 75.0
pod 0 1 0.0
total 24 86 27.9


line stmt bran cond sub pod time code
1             package Statistics::MVA::HotellingTwoSample;
2              
3 1     1   26219 use base Statistics::MVA;
  1         3  
  1         1045  
4 1     1   131028 use warnings;
  1         3  
  1         36  
5 1     1   5 use strict;
  1         8  
  1         36  
6 1     1   6 use Carp;
  1         1  
  1         81  
7 1     1   8103 use Statistics::Distributions qw/fprob/;
  1         9621  
  1         121  
8              
9             =head1 NAME
10              
11             Statistics::MVA::HotellingTwoSample - Two-Sample Hotelling's T-Square Test Statistic.
12              
13             =cut
14              
15             =head1 VERSION
16              
17             This document describes Statistics::MVA::HotellingTwoSample version 0.0.2
18              
19             =cut
20              
21             =head1 SYNOPSIS
22              
23             use Statistics::MVA::HotellingTwoSample;
24              
25             # we have two groups of data each with 4 variables and 9 observations.
26             my $data_x = [
27             [qw/ 292 222 52 57/],
28             [qw/ 100 227 51 45/],
29             [qw/ 272 218 49 36/],
30             [qw/ 101 221 17 47/],
31             [qw/ 181 208 12 35/],
32             [qw/ 111 118 51 54/],
33             [qw/ 288 321 51 49/],
34             [qw/ 286 219 52 45/],
35             [qw/ 262 225 47 44/],
36             ];
37             my $data_y = [
38             [qw/ 286 107 29 62/],
39             [qw/ 311 122 29 63/],
40             [qw/ 272 131 52 86/],
41             [qw/ 182 88 23 69/],
42             [qw/ 211 118 61 57/],
43             [qw/ 323 127 51 79/],
44             [qw/ 385 332 70 63/],
45             [qw/ 373 127 85 60/],
46             [qw/ 408 95 57 71/],
47             ];
48            
49             # Create a Statistics::MVA::HotellingTwoSample object and pass the data as two Lists-of-Lists within an anonymous array.
50             my $mva = Statistics::MVA::HotellingTwoSample->new([ $data_x, $data_y ]);
51              
52             # Generate results and print a report to STDOUT by calling hotelling_two_sample in void context.
53             $mva->hotelling_two_sample;
54              
55             # Call hotelling_two_sample in LIST-context to access the results directly.
56             my ($T2, $F, $pval, $df1, $df2) = $mva->hotelling_two_sample;
57              
58             =cut
59              
60             =head1 DESCRIPTION
61              
62             Hotelling's T-square statistics is a generalisation of Student's t statistic that is used for multivariate hypothesis
63             testing. See http://en.wikipedia.org/wiki/Hotelling%27s_T-square_distribution.
64              
65             =cut
66              
67 1     1   15 use version; our $VERSION = qv('0.0.2');
  1         2  
  1         14  
68              
69             sub hotelling_two_sample {
70              
71 0     0 0   my $self = shift;
72              
73 0           my $k = $self->[1];
74 0 0         croak qq{\nThis is a two-sample test - you must give two samples} if $k != 2;
75              
76 0           my $n_x = $self->[0][0][1];
77 0           my $n_y = $self->[0][1][1];
78              
79              
80             #y just variable number - again will need to check its equal for sample 2 - Statistics::MVA already checked p is same for all matrices...
81 0           my $p = $self->[2];
82             #print qq{\ntesting $n_x, $n_y and $p};
83              
84             #y Just averages of for each variable - naughty changed interface to MVA
85             #my @bar_x = &_bar($self->[3][0]);
86             #my @bar_y = &_bar($self->[3][1]);
87 0           my @bar_x = &_bar($self->[0][0][2]);
88 0           my @bar_y = &_bar($self->[0][1][2]);
89              
90             #y covariance matrices - this is already done!!! as part of MVA object creation
91 0           my $V_x = $self->[0][0][0];
92 0           my $V_y = $self->[0][1][0];
93            
94 0           my $V_x_adj = $V_x->shadow;
95 0           my $V_y_adj = $V_y->shadow;
96 0           my $V_p = $V_x->shadow;
97              
98 0           $V_x_adj->multiply_scalar($V_x,$n_x-1);
99 0           $V_y_adj->multiply_scalar($V_y,$n_y-1);
100 0           $V_p->add($V_x_adj,$V_y_adj);
101              
102 0           my $divisor = 1 / ($n_x + $n_y - 2);
103 0           $V_p->multiply_scalar($V_p,$divisor);
104            
105             #y subtract means...
106 0           my @bar_diff = map { $bar_x[$_] - $bar_y[$_] } (0..$#bar_x);
  0            
107              
108 0           my $bar_diff_mat = Math::MatrixReal->new_from_cols([[@bar_diff]]);
109              
110 0           my $dim;
111             my $x;
112 0           my $B_matreal;
113              
114             #/ using matrixreal - solve equation 'a %*% x = b' for 'x', - 'b' can be either a vector or a matrix.
115 0           my $LR = $V_p->decompose_LR();
116 0 0         if (!(($dim,$x,$B_matreal) = $LR->solve_LR($bar_diff_mat))) { croak qq{\nsystem has no solution} }
  0            
117             #print qq{\n\nhere is the matrixreal solution\n}, $x;
118              
119             #/ using Cephes
120             #my $M = Math::Cephes::Matrix->new($V_p->[0]);
121             #my $B = [@bar_diff];
122             #my $solution = $M->simq($B);
123             #print qq{\nhere is the cephes solution @{$solution}\n};
124            
125 0           my $crossprod = ~$bar_diff_mat * $x;
126 0           my $df1 = $p;
127 0           my $df2 = $n_x + $n_y-$p-1;
128 0           my $other = $n_x + $n_y - 2;
129              
130 0           my $T2 = ($crossprod->[0][0][0] * $df2 * (($n_x * $n_y)/($n_x+$n_y)) * $other * $p) / (($n_x+$n_y-2) * $p * $df2);
131 0           my $F = ( $df2 * $T2 ) / ($other * $df1);
132 0           my $pval = &fprob($df1,$df2,$F);
133 0 0         $pval = $pval < 1e-8 ? q{< 1e-8} : $pval;
134            
135 0 0         if ( !wantarray ) { print qq{\nT^2 = $T2\nF = $F \ndf1 = $df1\ndf2 = $df2 \np.value = $pval}; }
  0            
136 0           else { return ( $T2, $F, $pval, $df1, $df2 ) }
137 0           return;
138             }
139              
140             sub _bar {
141 0     0     my $lol = shift;
142 0           my $rows = scalar @{$lol};
  0            
143 0           my $cols = scalar @{$lol->[0]};
  0            
144 0           my @bar;
145 0           for my $col (0..$cols-1) {
146 0           my $sum = 0;
147 0           for my $row (0..$rows-1) {
148 0           $sum += $lol->[$row][$col];
149             }
150 0           push @bar, $sum/$rows;
151             }
152 0           $, = q{, };
153 0           return @bar;
154             }
155              
156             1; # Magic true value required at end of module
157              
158             __END__