File Coverage

blib/lib/Statistics/PCA/Varimax.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Statistics::PCA::Varimax;
2              
3 1     1   24695 use warnings;
  1         2  
  1         37  
4 1     1   5 use strict;
  1         2  
  1         34  
5 1     1   6 use Carp;
  1         6  
  1         103  
6 1     1   437 use Math::GSL::Linalg::SVD;
  0            
  0            
7             use Math::MatrixReal;
8             use List::Util qw(sum);
9              
10             =head1 NAME
11              
12             Statistics::PCA::Varimax - A Perl implementation of Varimax rotation.
13              
14             =cut
15              
16             =head1 VERSION
17              
18             This document describes Statistics::PCA::Varimax version 0.0.2
19              
20              
21             =cut
22              
23             =head1 SYNOPSIS
24              
25             use Statistics::PCA::Varimax;
26            
27             # Each nested array ref corresponds to the loadings for a single factor.
28             my $loadings = [
29             [qw/ 0.28681878905 0.69807334810 0.74438876316 0.47052419229 0.68079195447 0.49817011866 0.86049803480 0.64178962603 0.29784558460 /],
30             [qw/ 0.07560334830 0.15335493657 -0.40959477002 0.52231277744 -0.15586396086 -0.49832262559 -0.11502014276 0.32160898539 0.59537280152 /],
31             [qw/ -0.84084848877 -0.08371208961 0.02047721303 -0.13507580587 0.14832508991 0.25345619152 -0.01159349490 -0.04396749541 0.53340721684 /],
32             ];
33              
34             # Calculate the rotated loadings and orthogonal matrix.
35             my ($rotated_loadings_ref, $orthogonal_matrix_ref) = &rotate($loadings);
36              
37             print qq{\nRotated Loadings:\n};
38             for my $c (0..$#{$rotated_loadings_ref->[0]}) { for my $r (0..$#{$rotated_loadings_ref}) {
39             #print qq{$rotated_loadings_ref->[$r][$c], and r: $r and c: $c\t} }; print qq{\n};
40             print qq{$rotated_loadings_ref->[$r][$c]\t} }; print qq{\n};
41             }
42              
43             print qq{\nOrthogonal Matrix:\n};
44             for my $r (0..$#{$orthogonal_matrix_ref}) { for my $c (0..$#{$orthogonal_matrix_ref->[$r]}) {
45             print qq{$orthogonal_matrix_ref->[$r][$c]\t} }; print qq{\n};
46             }
47              
48             =cut
49              
50             =head1 DESCRIPTION
51              
52             Varimax rotation is a change of coordinates used in principal component analysis and factor analysis that maximizes the
53             sum of the variances of the squared loadings matrix. This module exports a single routine 'rotate'. This routine is
54             called in LIST context and accepts a LIST-of-LISTS (LoL) corresponding to the loadings matrix of a factor analysis and
55             returns two references to LoLs (NOTE: each nested LIST corresponds to the loadings for a single factor). The first is a
56             LoL of the rotated loadings and the seconds is a LoL of the orthogonal matrix. See http://en.wikipedia.org/wiki/Varimax_rotation.
57              
58             =cut
59              
60             =head1 DEPENDENCIES
61              
62             'Math::GSL::Linalg::SVD' => '0.0.2',
63             'Math::MatrixReal' => '2.05',
64             'List::Util' => '1.22',
65              
66             =cut
67              
68             =head1 AUTHOR
69              
70             Daniel S. T. Hughes C<< >>
71              
72             =cut
73              
74             use version; our $VERSION = qv('0.0.2');
75              
76             require Exporter;
77             our @ISA = qw(Exporter);
78             our @EXPORT = qw(rotate); # symbols to export by default
79              
80             sub rotate {
81             my $var = shift;
82              
83             croak qq{\nCall me in LIST context} if !wantarray;
84              
85             &_data_checks($var);
86              
87             # (1a) normalise
88              
89             #y calculate - the sqrt of the sum of the squares of each row
90             my $sc_vec = &_calc_sc_vec($var);
91              
92             #y divide each entry in each row by the normalising values - not matrix multiplication - i.e. in this case it is 9x3 devided by 1x9 - i.e. could only give 1x3 or 3x1
93              
94             my $mat_t = &_transpose($var);
95              
96             #/ call with 3rd arg for multiplification
97             #$mat_t = _devide_normalise($mat_t,$sc_vec);
98             $mat_t = &_normalise($mat_t,$sc_vec);
99              
100             my $normalised_data = &_transpose($mat_t);
101              
102             #y transpose before or after?!?
103             my $norm_mat = Math::MatrixReal->new_from_cols ( $normalised_data );
104              
105             # (1b) initialise others - p and nc - i.e. variable and factor number
106              
107             my $p_variables = scalar ( @{$mat_t} );
108             my $nc_factors = scalar ( @{$mat_t->[0]} );
109              
110             # (1c) initialise TT diagonal array
111             #my @ar = (1) x scalar ( @{$mat_t->[0]} );
112             my @ar = (1) x $nc_factors;
113             my $TT = Math::MatrixReal->new_diag( [ @ar ] );
114              
115             #/ iterations
116             $TT = &_iterate($TT, $p_variables, $nc_factors, $norm_mat );
117              
118             #y we repeat step 2a of loop for z generation one final time - i.e. z = x * TT
119             my $z = $norm_mat->multiply($TT);
120              
121             #my ($rows,$columns) = $TT->dim();
122             #my ($rows1,$columns1) = $norm_mat->dim();
123             #my ($rows2,$columns2) = $z->dim();
124              
125             #y we now reverse the normalisation step:
126             my $z_last = _deep_copy($z->[0]);
127            
128             #/ call with 3rd argument to multiply
129             #$z_last = &_multiply_normalise($z_last, $sc_vec);
130             $z_last = &_normalise($z_last, $sc_vec, 1);
131              
132             #y use from_rows instead of cols...
133             #my $z_last_mat = Math::MatrixReal->new_from_cols ( $z_last );
134             #$z_last_mat = ~$z_last_mat;
135             my $z_last_mat = Math::MatrixReal->new_from_rows ( $z_last );
136              
137             my $rotated_loadings = _transpose($z_last_mat->[0]);
138              
139             return $rotated_loadings, $TT->[0];
140             # return $z_last_mat, $TT;
141             }
142              
143             sub _data_checks {
144             my $data_a_ref = shift;
145              
146             my $rows = scalar ( @{$data_a_ref} );
147             croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows || $rows == 1 );
148             my $cols = scalar ( @{$data_a_ref->[0]} );
149             croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols || $cols == 1 );
150             for my $row (@{$data_a_ref}) {
151             croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} );
152             croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols );
153             }
154             #/ was lazy and cut-n-pasted inappropriate tests - i.e. don´t check for auto-assigning of undef... use matrixreal tests!
155             my $test = Math::MatrixReal->new_from_rows($data_a_ref) ;
156             return;
157             }
158              
159             sub _iterate {
160             my ( $TT, $p_variables, $nc_factors, $norm_mat ) = @_;
161            
162             # (1d) initialise d
163             my $d = 0;
164              
165             # (1e) initialise looping params;
166             my $z;
167             my $param = 1e-05;
168             my $count = 1;
169              
170            
171             LOOP_LABEL: for (1..3000) {
172              
173             # (2a) create z matrix: z <- x * TT
174             $z = $norm_mat->multiply($TT);
175              
176             # (2b) create matrix B
177              
178             #r (1) - create array of 1´s
179             #my @ar_var = (1) x scalar ( @{$mat_t} );
180             my @ar_var = (1) x $p_variables;
181             # make matrix out of single array/vector
182             my $vector_of_ones_mat = Math::MatrixReal->new_from_rows( [ [@ar_var] ] );
183              
184             #print qq{\n\nwe have diagonal matrix\n}, $vector_of_ones;
185              
186             #r (2) create matrices of z to powers
187             my $z_3_mat = _raise_to_power($z,3);
188             my $z_2_mat = _raise_to_power($z,2);
189              
190             #r (3) multiply vector_of_ones_mat by z_2_mat - vector of ones is 1x9 and z´s are same as loadings e.g. 9x3 - thus we generate 1xfactor-number
191             my $vec1s_z2 = $vector_of_ones_mat->multiply($z_2_mat);
192              
193             #r (4) we want to generate a matrix from a vector of diagonals - the vector of diagonals is in the single row
194             my $vec1s_z2_diag = Math::MatrixReal->new_diag( [ @{_deep_copy($vec1s_z2->[0])->[0]} ] );
195              
196             #r (5) divide each by factor number - do inplace
197             #$vec1s_z2_diag->multiply_scalar($vec1s_z2_diag,1/9);
198             $vec1s_z2_diag->multiply_scalar($vec1s_z2_diag,1/$p_variables);
199              
200             #r (6) multiply z by vec1s_z2_diag
201             my $vec1s_z2_diag_z_prod = $z->multiply($vec1s_z2_diag);
202              
203             #r (7) subtract vec1s_z2_diag_z_prod from z^3
204             # must initialise a matrix to use subtract
205             #my $z3_subtracted = new Math::MatrixReal(9,3);
206             my $z3_subtracted = new Math::MatrixReal($p_variables,$nc_factors); # matrix must already exist to use subtract
207             $z3_subtracted->subtract($z_3_mat,$vec1s_z2_diag_z_prod);
208              
209             #r (7) t(x) %*% (z^3 - z %*% diag(drop(rep(1, p) %*% z^2))/p)
210             #y instead of transposing x we transpode z3_subtracted to allow multiplification... then transpose... - probably best to tranpose other to directly get B
211              
212             #/ mult syntax is mat1(blah1,n) x mat2(n,blah2) mat1->multiply(mat2) - thus: $z3_subtracted = ~$z3_subtracted; my $B= $z3_subtracted->multiply($norm_mat);
213             #/ resulting in a need to transpose B should be identical to reversing process
214             # $z3_subtracted = ~$z3_subtracted;
215             # #y z3_subtracted is now 3x9 - norm is still 9x3
216             # #$norm_mat = ~$norm_mat;
217             # #y 3x9 * 9x3
218             # my $B = $z3_subtracted->multiply($norm_mat);
219             #
220             # $B = ~$B;
221             #/////////////////////////////////////////////////////////////////////////////////////
222             #y norm is 9x3
223             my $norm_mat_alt = ~$norm_mat;
224             #y norm_alt is 3x9
225             my $B = $norm_mat_alt->multiply($z3_subtracted);
226             #print qq{\n\nwe have B\n}, $B;
227             #/////////////////////////////////////////////////////////////////////////////////////
228              
229             # (2c) SVD - uses PDL and SDV GSL module
230             #r sB <- La.svd(B)
231             my $b = $B->[0];
232              
233             my $svd = Math::GSL::Linalg::SVD->new;
234             $svd->load_data( {data => $b});
235             $svd->decompose({ algorithm => q{gd} });
236             my ($d_vec, $u, $v) = $svd->results;
237              
238             my $u_mat = Math::MatrixReal->new_from_cols($u);
239             my $v_mat = Math::MatrixReal->new_from_cols($v);
240             $u_mat = ~$u_mat;
241              
242             # (2d) TT <- sB$u %*% sB$vt
243             $TT = $u_mat->multiply($v_mat);
244              
245             # (2e) we save old d
246             my $d_old = $d;
247             #my $d_old = deep_copy($d);
248              
249             # (2f) calculate new d - don´t re-declare - over-writing previous value
250             $d = sum @{$d_vec};
251              
252             # (2g) possible premature loop exit
253             $count++;
254             #if ( $d < ( $d_old * ( 1 + $param ) ) ) { print qq{\n\nEXITING EARLY AT ITERATION $count\n\n};last LOOP_LABEL; }
255             last LOOP_LABEL if ( $d < ( $d_old * ( 1 + $param ) ) );
256              
257             }
258             return $TT;
259             }
260              
261             sub _deep_copy {
262             my $ref = shift;
263             if (!ref $ref) { $ref; }
264             elsif (ref $ref eq q{ARRAY} ) { [ map { _deep_copy($_) } @{$ref} ]; }
265             elsif (ref $ref eq q{HASH} ) {
266             + { map { $_ => _deep_copy($ref->{$_}) } (keys %{$ref}) }; }
267             else { die "what type is $_?" }
268             }
269              
270             sub _transpose {
271             my $a_ref = shift;
272             my $done = [];
273             for my $col ( 0..$#{$a_ref->[0]} ) {
274             push @{$done}, [ map { $_->[$col] } @{$a_ref} ];
275             }
276             return $done;
277             }
278              
279             sub _calc_sc_vec {
280             my $lol = shift;
281             my $sc_vec = [];
282             $lol = &_transpose ($lol);
283             for my $i ( @{$lol} ) {
284             my $val = sqrt (sum map { $_**2 } @{$i} );
285             push @{$sc_vec}, $val;
286             }
287             return $sc_vec;
288             }
289              
290             sub _raise_to_power {
291             my ($z, $power) = @_;
292             # making a new matrix so best to deep_copy rather than fuck up the whole thing - just the data of the matrix
293             my $z_3 = _deep_copy($z->[0]);
294             for my $rows (0..$#{$z_3}) {
295             for my $col (@{$z_3->[$rows]}) {
296             $col = $col**$power }}
297             my $z_3_mat = Math::MatrixReal->new_from_rows( $z_3 );
298             #print qq{\n\nwe have z^3 in R\n}, $z_3_mat;
299             return $z_3_mat;
300             }
301              
302              
303             #print qq{\ncalling with 3rd arg:\n }, &_normalise(1,2,q{ee});
304             #print qq{\ncalling without 3rd arg:\n }, &_normalise(1,2,);
305              
306             #/ call with any 3rd arg to make multiplification
307             sub _normalise {
308             my ( $mat_t, $sc_vec, $mult ) = @_;
309             for my $rows (0..$#{$mat_t}) {
310             for my $col (@{$mat_t->[$rows]}) {
311             #if (@_ > 2) { print qq{long } } else { print qq{not long } };
312             #if (defined $mult) { print qq{defined } } else { print qq{not defined } };
313             if (@_ == 2) {
314             $col = $col / $sc_vec->[$rows];
315             }
316             elsif (@_ ==3) {
317             $col = $col * $sc_vec->[$rows];
318             }
319             }
320             }
321             return $mat_t;
322             }
323              
324             #my ($z_last_mat, $TT) = &rotate($var);
325             #print qq{\n\n\nwe are done\n\nloadings:\n}, $z_last_mat, qq{\n\nand rotmat:\n}, $TT;
326              
327              
328             1; # Magic true value required at end of module
329             __END__