File Coverage

blib/lib/Statistics/MVA.pm
Criterion Covered Total %
statement 27 144 18.7
branch 0 34 0.0
condition 0 12 0.0
subroutine 9 20 45.0
pod 0 1 0.0
total 36 211 17.0


line stmt bran cond sub pod time code
1             package Statistics::MVA;
2 1     1   46707 use strict;
  1         3  
  1         43  
3 1     1   5 use warnings;
  1         1  
  1         28  
4 1     1   5 use Carp;
  1         5  
  1         96  
5 1     1   2142 use Math::MatrixReal;
  1         145174  
  1         553  
6              
7             #r/ while this module is intended to be the base module and dependency for - and eventually... YOU CAN USE IT TO GENERATE GROUPS OF CV_MATRICES
8             #y standardise - i.e. make sig=1 and mu=0?!? while div - these are more for factor and PCA... so are largely irrelevant
9              
10             #/ need to confirm these are defaults: {standardise => 0, divisor => 1}
11              
12 1     1   2337 use version; our $VERSION = qv('0.0.2');
  1         8873  
  1         10  
13              
14             =head1 NAME
15              
16             Statistics::MVA - Base module/Dependency for other modules in Statistics::MVA namespace.
17              
18             =cut
19             =head1 VERSION
20              
21             This document describes Statistics::MVA version 0.0.2
22              
23             =cut
24              
25             #/ perhaps add something about generating CV_matrices using Statistics::MVA::CVMat and groups of them with Statistics::MVA
26              
27             =head1 DESCRIPTION
28              
29             This module is a base module for the other modules in the Statistics::MVA namespace (e.g. Statistics::MVA::Bartlett,
30             Statistics::MVA::Hotelling etc.). It is not intended for direct use - though it may be used for generating covariance matrices directly.
31              
32             This set of modules is still very much in development. Please let me know if you find any bugs.
33              
34             The constructor accepts an array containing a series of List-of-Lists (LoL) references and returns an object of the form
35             (modified output from Data::TreeDraw):
36            
37             ARRAY REFERENCE (0)
38             |
39             |__ARRAY REFERENCE (1) [ '->[0]' ]
40             | |
41             | |__ARRAY REFERENCE (2) [ '->[0][0]' ]
42             | | |
43             | | |__BLESSED OBJECT BELONGING TO CLASS: Math::MatrixReal (3) [ '->[0][0][0]' ]
44             | | | MatrixReal object containing covariance matrix for first LoL passed.
45             | | |
46             | | |__SCALAR = '7' (3) [ '->[0][0][1]' ]
47             | | | p for first LoL.
48             | | |
49             | | |__ARRAY REFERENCE (3) [ '->[0][0][2]' ]
50             | | LoL of the raw data passed.
51             | |
52             | Continues for all other LoLs refs passed.
53             |
54             |__SCALAR = '3' (1) [ '->[1]' ]
55             | k.
56             |
57             |__SCALAR = '3' (1) [ '->[2]' ]
58             | Overall p - i.e. only allows completes if all individual p´s are equal.
59             |
60             |__SCALAR = '0' (1) [ '->[3]' ]
61             | Value of standardise option.
62             |
63             |__SCALAR = '1' (1) [ '->[4]' ]
64             Value of divisor option.
65              
66             =cut
67              
68             sub new {
69 0     0 0   my ($class, $groups, $options) = @_;
70 0           my $k = scalar @{$groups};
  0            
71 0 0         croak qq{\nThere must be more than one group} if ($k < 2);
72 0 0 0       croak qq{\nArguments must be passed as HASH reference.} if ( ( $options ) && ( ref $options ne q{HASH} ) );
73              
74 0           my ($s_ref, $p, $stand, $div) = &_cv_matrices($groups, $k, $options);
75            
76             #y feed object - still need data, but this is messy
77 0           my $self = [$s_ref, $k, $p, $stand, $div];
78             #my $self = [$s_ref, $k, $p, $groups];
79              
80 0           bless $self, $class;
81 0           return $self;
82             }
83              
84             sub _cv_matrices {
85 0     0     my ( $groups, $k, $options ) = @_;
86 0 0 0       my $stand = defined $options && exists $options->{standardise} ? $options->{standardise} : 0;
87 0 0         my $div = exists $options->{divisor} ? $options->{divisor} : 1;
88 0           my @p;
89 0           my $a_ref = [];
90             #for my $i (0..$self->[1]-1) {
91 0           for my $i (0..$k-1) {
92             # this will be combined into single step
93 0           my $mva = Statistics::MVA::CVMat->new($groups->[$i],{standardise => $stand, divisor => $div});
94             #my $mva = Statistics::MVA->new($self->[0][$i],{standardise => 0, divisor => 1});
95 0           my $mva_matrix = my $a = Math::MatrixReal->new_from_rows($mva->[0]);
96 0           push @p, $mva->[1];
97             #y don´t want the adjusted scores anymore
98             #$a_ref->[$i] = [ $mva_matrix, $mva->[2], $mva->[3] ];
99             #$a_ref->[$i] = [ $mva_matrix, $mva->[2] ];
100             #y let´s feed data in too
101 0           $a_ref->[$i] = [ $mva_matrix, $mva->[2], $groups->[$i] ];
102             }
103 0           my $p_check = shift @p;
104             #croak qq{\nAll groups must have the same variable number} if &_p_notall(@p);
105 0 0         croak qq{\nAll groups must have the same variable number} if &_p_notall($p_check, \@p);
106 0           return ($a_ref, $p_check, $stand, $div);
107             }
108              
109             sub _p_notall {
110 0     0     my ($p_check, $p_ref) = @_;
111             #my @p = @_;
112             #my $p_check = shift @p;
113             #for (@p) {
114 0           for (@{$p_ref}) {
  0            
115             # return 0 if $_ != $p_check;
116 0 0         return 1 if $_ != $p_check;
117             }
118 0           return 0;
119             }
120              
121             1;
122              
123             package Statistics::MVA::CVMat;
124 1     1   1949 use strict;
  1         3  
  1         42  
125 1     1   6 use warnings;
  1         2  
  1         38  
126 1     1   4 use Carp;
  1         2  
  1         94  
127 1     1   6 use List::Util qw/sum/;
  1         2  
  1         4083  
128              
129             # covariance matrix or dispersion matrix is a matrix of covariances between elements
130             sub new {
131 0     0     my ( $class, $lol, $h_ref ) = @_;
132              
133 0 0 0       croak qq{\nData must be passed as ARRAY reference.} if ( !$lol || ( ref $lol ne q{ARRAY} ) );
134 0 0 0       croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) );
135 0 0         croak qq{\nAll data must be a matrix of numbers} if &_check($lol);
136            
137 0 0         my $stand = exists $h_ref->{standardise} ? $h_ref->{standardise} : 0;
138 0 0         my $div = exists $h_ref->{divisor} ? $h_ref->{divisor} : 1;
139 0           $lol = &transpose($lol);
140             # need to have adjusted atm
141             # my ( $cv, $p, $n ) = &_cv($lol, $stand, $div);
142             #y not using adjusted anymore here
143 0           my ( $cv, $p, $n ) = &_cv($lol, $stand, $div);
144             #my ( $cv, $p, $n, $adjusted ) = &_cv($lol, $stand, $div);
145             #y not using adjusted anymore here
146 0           my $self = [$cv,$p,$n];
147             # my $self = [$cv,$p,$n, $adjusted];
148 0           bless $self, $class;
149 0           return $self;
150             }
151              
152             sub _check {
153             # we already checked $lol is an array.
154 0     0     my $lol = shift;
155 0           my @lol = @{$lol};
  0            
156 0           my $l_check = shift @lol;
157 0           $l_check = scalar ( @{$l_check} );
  0            
158 0           for my $r (@lol) {
159 0 0         return 1 if ( scalar ( @{$r} ) != $l_check );
  0            
160 0           for my $cell (@{$r}) {
  0            
161             #/ No need to check that $cells are scalars - i.e. ref \$cell eq q{SCALAR} etc as regexp checks it a number!
162 0 0         return 1 if ( $cell !~ /\A[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?\z/xms );
163             }
164             }
165 0           return 0;
166             }
167              
168             sub transpose {
169 0     0     my $a_ref = shift;
170 0           my $done = [];
171 0           for my $col ( 0..$#{$a_ref->[0]} ) {
  0            
172 0           push @{$done}, [ map { $_->[$col] } @{$a_ref} ];
  0            
  0            
  0            
173             }
174 0           return $done;
175             }
176              
177             sub _cv {
178 0     0     my ( $lol, $stand, $div ) = @_;
179             # only accepts table format
180 0           my ( $averages, $var_num, $var_length) = &_averages($lol);
181 0           my $variances = &_variances ( $lol, $averages, $var_num );
182 0           my $adjusted = &_adjust ($lol, $averages, $variances, $var_num, $stand);
183 0           my $cv_mat = &_CVs($adjusted, $var_num, $var_length, $div);
184             #y not using adjusted anymore here
185 0           return ($cv_mat, $var_num, $var_length);
186             # return ($cv_mat, $var_num, $var_length, $adjusted);
187             }
188              
189             sub _averages {
190 0     0     my $lol = shift;
191 0           my $var_num = scalar ( @{$lol} );
  0            
192 0           my $var_length = scalar ( @{$lol->[0]} );
  0            
193 0           my $totals_ref = [];
194 0           for my $row (0..$var_num-1) {
195 0           my $sum = sum @{$lol->[$row]};
  0            
196 0           my $length = scalar ( @{$lol->[$row]} );
  0            
197 0           my $average = $sum / $length;
198 0           push @{$totals_ref}, { sum => $sum, length => $length, average => $average};
  0            
199             }
200             #$self->{averages} = $totals_ref;
201             #$self->{var_num} = $var_num;
202             #$self->{var_length} = $var_length;
203 0           return ($totals_ref, $var_num, $var_length);
204             }
205              
206             sub _variances {
207 0     0     my ( $data, $avs, $n ) = @_;
208             #my $self = shift;
209             #my $data = $self->{data};
210             #my $avs = $self->{averages};
211 0           my $var = [];
212 0           for my $row ( 0..$n-1 ) {
213 0           my $sum = sum map { ($_ - $avs->[$row]{average})**2 } @{$data->[$row]};
  0            
  0            
214 0           my $length = scalar ( @{$data->[$row]} );
  0            
215 0           my $variance = $sum / $length;
216 0           push @{$var}, $variance;
  0            
217             }
218 0           return $var;
219             }
220              
221             sub _adjust {
222              
223 0     0     my ($trans, $totals, $variances, $n, $stand) = @_;
224 0           my $adjust = [];
225 0 0         croak qq{\nI don\'t recognise that value for the \'standardise\' option - requires \'1\' or \'0\' (defaults to \'0\' without option).}
226             if ( $stand !~ /\A[01]\z/xms );
227            
228             # if ( $stand == 1 ) { for my $row ( 0..$n-1 ) { @{$adjust->[$row]} = map { ( $_ - $totals->[$row]{average}) / sqrt($variances->[$row]) } @{$trans->[$row]}; }
229             # $self->{adjusted} = $adjust; return $adjust; }
230             # elsif ($stand == 0 ) { for my $row ( 0..$n-1 ) { @{$adjust->[$row]} = map { ( $_ - $totals->[$row]{average}) } @{$trans->[$row]}; }
231             # $self->{adjusted} = $adjust; return $adjust }
232              
233 0           for my $row ( 0..$n-1 ) {
234 0 0         my $divisor = $stand == 1 ? sqrt($variances->[$row]) : 1;
235 0           @{$adjust->[$row]} =
  0            
236             #map { ( $_ - $totals->[$row]{average}) / sqrt($variances->[$row]) } @{$trans->[$row]};
237 0           map { ( $_ - $totals->[$row]{average}) / $divisor } @{$trans->[$row]};
  0            
238             }
239 0           return $adjust;
240             }
241              
242             sub _CVs {
243              
244 0     0     my ($adjusted, $var_num, $length, $div) = @_;
245 0 0         croak qq{\nI don\'t recognise that value for the \'divisor\' option - requires \'1\' for n-1 or \'0\' for n (defaults to \'1\').}
246             if ( $div !~ /\A[01]\z/xms );
247 0           my $covariance_matrix_ref = [];
248            
249             #/ this is silly - just have divisor: $divisor = $div == 1 ? $length-1 : $length;
250             # if ( $div == 0 ) { for my $row ( 0..($var_num-1) ) { for my $col ( 0..($var_num-1) ) { my $sum = 0; for my $iteration (0..$#{$adjusted->[0]}) {
251             # my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration]; $sum += $val; } my $cv = $sum / ($length-1); my $cv = $sum / $length;
252             # $covariance_matrix_ref->[$col][$row] = $cv; } } $self->{covariance_matrix} = $covariance_matrix_ref; return $covariance_matrix_ref; }
253             # elsif ( $div == 1 ) { for my $row ( 0..($var_num-1) ) { for my $col ( 0..($var_num-1) ) { my $sum = 0; for my $iteration (0..$#{$adjusted->[0]}) {
254             # my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration]; $sum += $val; } my $cv = $sum / ($length-1); my $cv = $sum / $length;
255             # $covariance_matrix_ref->[$col][$row] = $cv; } } $self->{covariance_matrix} = $covariance_matrix_ref; return $covariance_matrix_ref; }
256              
257 0 0         my $divisor = $div == 1 ? $length-1 : $length;
258              
259 0           for my $row ( 0..($var_num-1) ) {
260 0           for my $col ( 0..($var_num-1) ) {
261 0           my $sum = 0;
262 0           for my $iteration (0..$#{$adjusted->[0]}) {
  0            
263 0           my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration];
264 0           $sum += $val;
265             }
266             #my $cv = $sum / ($length-1);
267 0           my $cv = $sum / $divisor;
268             #my $cv = $sum / $length;
269 0           $covariance_matrix_ref->[$col][$row] = $cv;
270             }
271             }
272             #$self->{covariance_matrix} = $covariance_matrix_ref;
273 0           return $covariance_matrix_ref;
274             }
275              
276             1;
277              
278             __END__