File Coverage

blib/lib/Statistics/PCA/Varimax.pm

Criterion	Covered	Total	%
statement	10	12	83.3
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	14	16	87.5

line	stmt	sub	time	code
1				package Statistics::PCA::Varimax;
2
3	1	1	24695	use warnings;
	1		2
	1		37
4	1	1	5	use strict;
	1		2
	1		34
5	1	1	6	use Carp;
	1		6
	1		103
6	1	1	437	use Math::GSL::Linalg::SVD;
	0
	0
7				use Math::MatrixReal;
8				use List::Util qw(sum);
9
10				=head1 NAME
11
12				Statistics::PCA::Varimax - A Perl implementation of Varimax rotation.
13
14				=cut
15
16				=head1 VERSION
17
18				This document describes Statistics::PCA::Varimax version 0.0.2
19
20
21				=cut
22
23				=head1 SYNOPSIS
24
25				use Statistics::PCA::Varimax;
26
27				# Each nested array ref corresponds to the loadings for a single factor.
28				my $loadings = [
29				[qw/ 0.28681878905 0.69807334810 0.74438876316 0.47052419229 0.68079195447 0.49817011866 0.86049803480 0.64178962603 0.29784558460 /],
30				[qw/ 0.07560334830 0.15335493657 -0.40959477002 0.52231277744 -0.15586396086 -0.49832262559 -0.11502014276 0.32160898539 0.59537280152 /],
31				[qw/ -0.84084848877 -0.08371208961 0.02047721303 -0.13507580587 0.14832508991 0.25345619152 -0.01159349490 -0.04396749541 0.53340721684 /],
32				];
33
34				# Calculate the rotated loadings and orthogonal matrix.
35				my ($rotated_loadings_ref, $orthogonal_matrix_ref) = &rotate($loadings);
36
37				print qq{\nRotated Loadings:\n};
38				for my $c (0..$#{$rotated_loadings_ref->[0]}) { for my $r (0..$#{$rotated_loadings_ref}) {
39				#print qq{$rotated_loadings_ref->[$r][$c], and r: $r and c: $c\t} }; print qq{\n};
40				print qq{$rotated_loadings_ref->[$r][$c]\t} }; print qq{\n};
41				}
42
43				print qq{\nOrthogonal Matrix:\n};
44				for my $r (0..$#{$orthogonal_matrix_ref}) { for my $c (0..$#{$orthogonal_matrix_ref->[$r]}) {
45				print qq{$orthogonal_matrix_ref->[$r][$c]\t} }; print qq{\n};
46				}
47
48				=cut
49
50				=head1 DESCRIPTION
51
52				Varimax rotation is a change of coordinates used in principal component analysis and factor analysis that maximizes the
53				sum of the variances of the squared loadings matrix. This module exports a single routine 'rotate'. This routine is
54				called in LIST context and accepts a LIST-of-LISTS (LoL) corresponding to the loadings matrix of a factor analysis and
55				returns two references to LoLs (NOTE: each nested LIST corresponds to the loadings for a single factor). The first is a
56				LoL of the rotated loadings and the seconds is a LoL of the orthogonal matrix. See http://en.wikipedia.org/wiki/Varimax_rotation.
57
58				=cut
59
60				=head1 DEPENDENCIES
61
62				'Math::GSL::Linalg::SVD' => '0.0.2',
63				'Math::MatrixReal' => '2.05',
64				'List::Util' => '1.22',
65
66				=cut
67
68				=head1 AUTHOR
69
70				Daniel S. T. Hughes C<< >>
71
72				=cut
73
74				use version; our $VERSION = qv('0.0.2');
75
76				require Exporter;
77				our @ISA = qw(Exporter);
78				our @EXPORT = qw(rotate); # symbols to export by default
79
80				sub rotate {
81				my $var = shift;
82
83				croak qq{\nCall me in LIST context} if !wantarray;
84
85				&_data_checks($var);
86
87				# (1a) normalise
88
89				#y calculate - the sqrt of the sum of the squares of each row
90				my $sc_vec = &_calc_sc_vec($var);
91
92				#y divide each entry in each row by the normalising values - not matrix multiplication - i.e. in this case it is 9x3 devided by 1x9 - i.e. could only give 1x3 or 3x1
93
94				my $mat_t = &_transpose($var);
95
96				#/ call with 3rd arg for multiplification
97				#$mat_t = _devide_normalise($mat_t,$sc_vec);
98				$mat_t = &_normalise($mat_t,$sc_vec);
99
100				my $normalised_data = &_transpose($mat_t);
101
102				#y transpose before or after?!?
103				my $norm_mat = Math::MatrixReal->new_from_cols ( $normalised_data );
104
105				# (1b) initialise others - p and nc - i.e. variable and factor number
106
107				my $p_variables = scalar ( @{$mat_t} );
108				my $nc_factors = scalar ( @{$mat_t->[0]} );
109
110				# (1c) initialise TT diagonal array
111				#my @ar = (1) x scalar ( @{$mat_t->[0]} );
112				my @ar = (1) x $nc_factors;
113				my $TT = Math::MatrixReal->new_diag( [ @ar ] );
114
115				#/ iterations
116				$TT = &_iterate($TT, $p_variables, $nc_factors, $norm_mat );
117
118				#y we repeat step 2a of loop for z generation one final time - i.e. z = x * TT
119				my $z = $norm_mat->multiply($TT);
120
121				#my ($rows,$columns) = $TT->dim();
122				#my ($rows1,$columns1) = $norm_mat->dim();
123				#my ($rows2,$columns2) = $z->dim();
124
125				#y we now reverse the normalisation step:
126				my $z_last = _deep_copy($z->[0]);
127
128				#/ call with 3rd argument to multiply
129				#$z_last = &_multiply_normalise($z_last, $sc_vec);
130				$z_last = &_normalise($z_last, $sc_vec, 1);
131
132				#y use from_rows instead of cols...
133				#my $z_last_mat = Math::MatrixReal->new_from_cols ( $z_last );
134				#$z_last_mat = ~$z_last_mat;
135				my $z_last_mat = Math::MatrixReal->new_from_rows ( $z_last );
136
137				my $rotated_loadings = _transpose($z_last_mat->[0]);
138
139				return $rotated_loadings, $TT->[0];
140				# return $z_last_mat, $TT;
141				}
142
143				sub _data_checks {
144				my $data_a_ref = shift;
145
146				my $rows = scalar ( @{$data_a_ref} );
147				croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows \|\| $rows == 1 );
148				my $cols = scalar ( @{$data_a_ref->[0]} );
149				croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols \|\| $cols == 1 );
150				for my $row (@{$data_a_ref}) {
151				croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} );
152				croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols );
153				}
154				#/ was lazy and cut-n-pasted inappropriate tests - i.e. don´t check for auto-assigning of undef... use matrixreal tests!
155				my $test = Math::MatrixReal->new_from_rows($data_a_ref) ;
156				return;
157				}
158
159				sub _iterate {
160				my ( $TT, $p_variables, $nc_factors, $norm_mat ) = @_;
161
162				# (1d) initialise d
163				my $d = 0;
164
165				# (1e) initialise looping params;
166				my $z;
167				my $param = 1e-05;
168				my $count = 1;
169
170
171				LOOP_LABEL: for (1..3000) {
172
173				# (2a) create z matrix: z <- x * TT
174				$z = $norm_mat->multiply($TT);
175
176				# (2b) create matrix B
177
178				#r (1) - create array of 1´s
179				#my @ar_var = (1) x scalar ( @{$mat_t} );
180				my @ar_var = (1) x $p_variables;
181				# make matrix out of single array/vector
182				my $vector_of_ones_mat = Math::MatrixReal->new_from_rows( [ [@ar_var] ] );
183
184				#print qq{\n\nwe have diagonal matrix\n}, $vector_of_ones;
185
186				#r (2) create matrices of z to powers
187				my $z_3_mat = _raise_to_power($z,3);
188				my $z_2_mat = _raise_to_power($z,2);
189
190				#r (3) multiply vector_of_ones_mat by z_2_mat - vector of ones is 1x9 and z´s are same as loadings e.g. 9x3 - thus we generate 1xfactor-number
191				my $vec1s_z2 = $vector_of_ones_mat->multiply($z_2_mat);
192
193				#r (4) we want to generate a matrix from a vector of diagonals - the vector of diagonals is in the single row
194				my $vec1s_z2_diag = Math::MatrixReal->new_diag( [ @{_deep_copy($vec1s_z2->[0])->[0]} ] );
195
196				#r (5) divide each by factor number - do inplace
197				#$vec1s_z2_diag->multiply_scalar($vec1s_z2_diag,1/9);
198				$vec1s_z2_diag->multiply_scalar($vec1s_z2_diag,1/$p_variables);
199
200				#r (6) multiply z by vec1s_z2_diag
201				my $vec1s_z2_diag_z_prod = $z->multiply($vec1s_z2_diag);
202
203				#r (7) subtract vec1s_z2_diag_z_prod from z^3
204				# must initialise a matrix to use subtract
205				#my $z3_subtracted = new Math::MatrixReal(9,3);
206				my $z3_subtracted = new Math::MatrixReal($p_variables,$nc_factors); # matrix must already exist to use subtract
207				$z3_subtracted->subtract($z_3_mat,$vec1s_z2_diag_z_prod);
208
209				#r (7) t(x) %% (z^3 - z %% diag(drop(rep(1, p) %*% z^2))/p)
210				#y instead of transposing x we transpode z3_subtracted to allow multiplification... then transpose... - probably best to tranpose other to directly get B
211
212				#/ mult syntax is mat1(blah1,n) x mat2(n,blah2) mat1->multiply(mat2) - thus: $z3_subtracted = ~$z3_subtracted; my $B= $z3_subtracted->multiply($norm_mat);
213				#/ resulting in a need to transpose B should be identical to reversing process
214				# $z3_subtracted = ~$z3_subtracted;
215				# #y z3_subtracted is now 3x9 - norm is still 9x3
216				# #$norm_mat = ~$norm_mat;
217				# #y 3x9 * 9x3
218				# my $B = $z3_subtracted->multiply($norm_mat);
219				#
220				# $B = ~$B;
221				#/////////////////////////////////////////////////////////////////////////////////////
222				#y norm is 9x3
223				my $norm_mat_alt = ~$norm_mat;
224				#y norm_alt is 3x9
225				my $B = $norm_mat_alt->multiply($z3_subtracted);
226				#print qq{\n\nwe have B\n}, $B;
227				#/////////////////////////////////////////////////////////////////////////////////////
228
229				# (2c) SVD - uses PDL and SDV GSL module
230				#r sB <- La.svd(B)
231				my $b = $B->[0];
232
233				my $svd = Math::GSL::Linalg::SVD->new;
234				$svd->load_data( {data => $b});
235				$svd->decompose({ algorithm => q{gd} });
236				my ($d_vec, $u, $v) = $svd->results;
237
238				my $u_mat = Math::MatrixReal->new_from_cols($u);
239				my $v_mat = Math::MatrixReal->new_from_cols($v);
240				$u_mat = ~$u_mat;
241
242				# (2d) TT <- sB$u %*% sB$vt
243				$TT = $u_mat->multiply($v_mat);
244
245				# (2e) we save old d
246				my $d_old = $d;
247				#my $d_old = deep_copy($d);
248
249				# (2f) calculate new d - don´t re-declare - over-writing previous value
250				$d = sum @{$d_vec};
251
252				# (2g) possible premature loop exit
253				$count++;
254				#if ( $d < ( $d_old * ( 1 + $param ) ) ) { print qq{\n\nEXITING EARLY AT ITERATION $count\n\n};last LOOP_LABEL; }
255				last LOOP_LABEL if ( $d < ( $d_old * ( 1 + $param ) ) );
256
257				}
258				return $TT;
259				}
260
261				sub _deep_copy {
262				my $ref = shift;
263				if (!ref $ref) { $ref; }
264				elsif (ref $ref eq q{ARRAY} ) { [ map { _deep_copy($_) } @{$ref} ]; }
265				elsif (ref $ref eq q{HASH} ) {
266				+ { map { $_ => _deep_copy($ref->{$_}) } (keys %{$ref}) }; }
267				else { die "what type is $_?" }
268				}
269
270				sub _transpose {
271				my $a_ref = shift;
272				my $done = [];
273				for my $col ( 0..$#{$a_ref->[0]} ) {
274				push @{$done}, [ map { $_->[$col] } @{$a_ref} ];
275				}
276				return $done;
277				}
278
279				sub _calc_sc_vec {
280				my $lol = shift;
281				my $sc_vec = [];
282				$lol = &_transpose ($lol);
283				for my $i ( @{$lol} ) {
284				my $val = sqrt (sum map { $_**2 } @{$i} );
285				push @{$sc_vec}, $val;
286				}
287				return $sc_vec;
288				}
289
290				sub _raise_to_power {
291				my ($z, $power) = @_;
292				# making a new matrix so best to deep_copy rather than fuck up the whole thing - just the data of the matrix
293				my $z_3 = _deep_copy($z->[0]);
294				for my $rows (0..$#{$z_3}) {
295				for my $col (@{$z_3->[$rows]}) {
296				$col = $col**$power }}
297				my $z_3_mat = Math::MatrixReal->new_from_rows( $z_3 );
298				#print qq{\n\nwe have z^3 in R\n}, $z_3_mat;
299				return $z_3_mat;
300				}
301
302
303				#print qq{\ncalling with 3rd arg:\n }, &_normalise(1,2,q{ee});
304				#print qq{\ncalling without 3rd arg:\n }, &_normalise(1,2,);
305
306				#/ call with any 3rd arg to make multiplification
307				sub _normalise {
308				my ( $mat_t, $sc_vec, $mult ) = @_;
309				for my $rows (0..$#{$mat_t}) {
310				for my $col (@{$mat_t->[$rows]}) {
311				#if (@_ > 2) { print qq{long } } else { print qq{not long } };
312				#if (defined $mult) { print qq{defined } } else { print qq{not defined } };
313				if (@_ == 2) {
314				$col = $col / $sc_vec->[$rows];
315				}
316				elsif (@_ ==3) {
317				$col = $col * $sc_vec->[$rows];
318				}
319				}
320				}
321				return $mat_t;
322				}
323
324				#my ($z_last_mat, $TT) = &rotate($var);
325				#print qq{\n\n\nwe are done\n\nloadings:\n}, $z_last_mat, qq{\n\nand rotmat:\n}, $TT;
326
327
328				1; # Magic true value required at end of module
329				__END__