line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::MVA::BayesianDiscrimination; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
49066
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
28
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
5
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
81
|
|
6
|
1
|
|
|
1
|
|
1195
|
use Statistics::MVA; |
|
1
|
|
|
|
|
44870
|
|
|
1
|
|
|
|
|
37
|
|
7
|
1
|
|
|
1
|
|
953
|
use Math::Cephes qw/:explog/; |
|
1
|
|
|
|
|
8168
|
|
|
1
|
|
|
|
|
369
|
|
8
|
1
|
|
|
1
|
|
10
|
use List::Util qw/sum/; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
74
|
|
9
|
1
|
|
|
1
|
|
1009
|
use Text::SimpleTable; |
|
1
|
|
|
|
|
2197
|
|
|
1
|
|
|
|
|
72
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
#=fs pod stuff |
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Statistics::MVA::BayesianDiscrimination - Two-Sample Linear Discrimination Analysis with Posterior Probability Calculation. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 VERSION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This document describes Statistics::MVA::BayesianDiscrimination version 0.0.2 |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=cut |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Discriminant analysis is a procedure for classifying a set of observations each with k variables into predefined classes such as to allow the |
27
|
|
|
|
|
|
|
determination of the class of new observations based upon the values of the k variables for these new observations. Group membership based on linear combinations of the variables. From the set of observations where group membership is know the procedure constructs a set of linear functions, termed |
28
|
|
|
|
|
|
|
discriminant functions, such that: |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
L = B[0] + B[1] * x1 + B[2] * x2 +... ... + B[n] * x_n |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Where B[0] is a constant, B[n's] are discriminant coefficients and x's are the input variables. These discriminant functions |
33
|
|
|
|
|
|
|
(there is one for each group - consequently as this module only analyses data for two groups atm it generates two such |
34
|
|
|
|
|
|
|
discriminant functions. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Before proceeding with the analysis you should: (1) Perform Bartlett´s test to see if the covariance matrices of the data |
37
|
|
|
|
|
|
|
are homogenous for the populations used (see L. If they are not homogenous you should use Quadratic Discrimination analysis. |
38
|
|
|
|
|
|
|
(2) test for equality of the group means using Hotelling's T^2 (see L or MANOVA. If |
39
|
|
|
|
|
|
|
the groups do not differ significantly it is extremely unlikely that discrimination analysis with generate any useful |
40
|
|
|
|
|
|
|
discrimination rules. (3) Specify the prior probabilities. This module allows you to do this in several ways - see |
41
|
|
|
|
|
|
|
L. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
This class automatically generates the discrimination coefficients at part of object construction. You can then either |
44
|
|
|
|
|
|
|
use the C |
45
|
|
|
|
|
|
|
observation. Both of these methods are context dependent - see L. See |
46
|
|
|
|
|
|
|
http://en.wikipedia.org/wiki/Linear_discriminant_analysis for further details. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=cut |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 SYNOPSIS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# we have two groups of data each with 3 variables and 10 observations - example data from http://www.stat.psu.edu/online/courses/stat505/data/insect.txt |
53
|
|
|
|
|
|
|
my $data_X = [ |
54
|
|
|
|
|
|
|
[qw/ 191 131 53/], |
55
|
|
|
|
|
|
|
[qw/ 185 134 50/], |
56
|
|
|
|
|
|
|
[qw/ 200 137 52/], |
57
|
|
|
|
|
|
|
[qw/ 173 127 50/], |
58
|
|
|
|
|
|
|
[qw/ 171 128 49/], |
59
|
|
|
|
|
|
|
[qw/ 160 118 47/], |
60
|
|
|
|
|
|
|
[qw/ 188 134 54/], |
61
|
|
|
|
|
|
|
[qw/ 186 129 51/], |
62
|
|
|
|
|
|
|
[qw/ 174 131 52/], |
63
|
|
|
|
|
|
|
[qw/ 163 115 47/], |
64
|
|
|
|
|
|
|
]; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my $data_Y = [ |
67
|
|
|
|
|
|
|
[qw/ 186 107 49/], |
68
|
|
|
|
|
|
|
[qw/ 211 122 49/], |
69
|
|
|
|
|
|
|
[qw/ 201 144 47/], |
70
|
|
|
|
|
|
|
[qw/ 242 131 54/], |
71
|
|
|
|
|
|
|
[qw/ 184 108 43/], |
72
|
|
|
|
|
|
|
[qw/ 211 118 51/], |
73
|
|
|
|
|
|
|
[qw/ 217 122 49/], |
74
|
|
|
|
|
|
|
[qw/ 223 127 51/], |
75
|
|
|
|
|
|
|
[qw/ 208 125 50/], |
76
|
|
|
|
|
|
|
[qw/ 199 124 46/], |
77
|
|
|
|
|
|
|
]; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
use Statistics::MVA::BayesianDiscrimination; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Pass the data as a list of the two LISTS-of-LISTS above (termed X and Y). The module by default assumes equal prior probabilities. |
82
|
|
|
|
|
|
|
#my $bld = Statistics::MVA::BayesianDiscrimination->new($data_X,$data_Y); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Pass the data but telling the module to calculate the prior probabilities as the ratio of observations for the two groups (e.g. P(X) X_obs_num / Total_obs. |
85
|
|
|
|
|
|
|
#my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Pass the data but directly specifying the values of prior probability for X and Y to use as an anonymous array. |
88
|
|
|
|
|
|
|
#my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => [ 0.25, 0.75 ] },$ins_a,$ins_b); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Print values for coefficients to STDOUT. |
91
|
|
|
|
|
|
|
$bld->output; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Pass the values as an ARRAY reference by calling in LIST context - see L. |
94
|
|
|
|
|
|
|
my ($prior_x, $constant_x, $matrix_x, $prior_y, $constant_y, $matrix_y) = $bld->output; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# Perform discriminantion analyis for a specific observation and print result to STDOUT. |
97
|
|
|
|
|
|
|
$bld->discriminate([qw/184 114 59/]); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Call in LIST context to obtain results directly - see L. |
100
|
|
|
|
|
|
|
my ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type) = $bld->discriminate([qw/194 124 49/]); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=cut |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 METHODS |
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 new |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Creates a new Statistics::MVA::BayesianDiscrimination. This accepts two references for List-of-Lists of values |
110
|
|
|
|
|
|
|
corresponding to the two groups of data - termed X and Y. Within each List-of-Lists each nested array corresponds to a single set of observations. |
111
|
|
|
|
|
|
|
It also accepts an optional HASH reference of options preceding these values. The constructor automatically generates |
112
|
|
|
|
|
|
|
the discrimination coefficients that are accessed using the C |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# Pass data as ARRAY references. |
115
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new($data_X,$data_Y); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Passing optional HASH reference of options. |
118
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head2 output |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Context-dependent method for accessing results of discrimination analysis. In void context it prints the coefficients to STDOUT. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
$bld->output; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
In LIST-context it returns a list of the relevant data accessed as follows: |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
my ($prior_x, $constant_x, $matrix_x, $prior_y, $constant_y, $matrix_y) = $bld->output; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
print qq{\nPrior probability of X = $prior_x and Y = $prior_y.}; |
133
|
|
|
|
|
|
|
print qq{\nConstants for discrimination function for X = $constant_x and Y = $constant_y.}; |
134
|
|
|
|
|
|
|
print qq{\nCoefficients for discrimination function X = @{$matrix_x}.}; |
135
|
|
|
|
|
|
|
print qq{\nCoefficients for discrimination function Y = @{$matrix_y}.}; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head2 discriminate |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Method for classification of a new observation. Pass it an ARRAY reference of SCALAR values appropriate for the original |
142
|
|
|
|
|
|
|
data-sets passed to the constructor. In void context it prints a report to STDOUT: |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
$bld->discriminate([qw/123 34 325/]; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
In LIST-context it returns a list of the relevant data as follows: |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
my ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type) = $bld->discriminate([qw/123 34 325/]; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
print qq{\nLinear score function for X = $val_x and Y = $val_y - the new observation is of type \x27$type\x27.}; |
151
|
|
|
|
|
|
|
print qq{\nThe prior probability that the new observation is of type X = $p_x and the posterior probability = $post_p_x}; |
152
|
|
|
|
|
|
|
print qq{\nThe prior probability that the new observation is of type X = $p_y and the posterior probability = $post_p_y}; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=cut |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 OPTIONS |
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 priors |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Pass within an anonymous HASH preceding the two data references during object construction: |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => option_value },$data_X,$data_Y); |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Passing '0' causes the module to assume equal prior probabilities for the two groups (prior_x = prior_y = 0.5). Passing |
166
|
|
|
|
|
|
|
'1' causes the module to generate priors depending on the ratios of the two data-sets e.g. X has 15 observations and Y |
167
|
|
|
|
|
|
|
has 27 observations gives prior_x = 15 / (15 + 27). Alternatively you may specify the values to use by passing an anonymous ARRAY reference of length 2 |
168
|
|
|
|
|
|
|
where the first value is prior_x and the second is prior_y. There are currently no checks on priors directly passed so |
169
|
|
|
|
|
|
|
ensure that prior_x + prior_y = 1 if you supply you own. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Use prior_x = prior_y = 0.5. |
172
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 0 },$data_X,$data_Y); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Generate priors depending on rations of observation numbers. |
175
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y); |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# Specify your own priors. |
178
|
|
|
|
|
|
|
my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => [$prior_x, $prior_y] },$data_X,$data_Y); |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
#=fe |
183
|
|
|
|
|
|
|
|
184
|
1
|
|
|
1
|
|
6
|
use version; our $VERSION = qv('0.0.2'); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
9
|
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
#/ with these new modules object creation directly checks data and performs analysis. storing just the essential results. can access directly or print them |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub new { |
189
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
190
|
|
|
|
|
|
|
#y grab an options hash if there is one |
191
|
0
|
0
|
|
|
|
|
my $options_ref = shift if ( ref $_[0] eq q{HASH} ); |
192
|
|
|
|
|
|
|
#y the rest is data |
193
|
0
|
|
|
|
|
|
my @data = @_; |
194
|
0
|
|
|
|
|
|
my $p_x; |
195
|
|
|
|
|
|
|
my $p_y; |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
my $self = []; |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
my $k = scalar @data; |
200
|
0
|
0
|
|
|
|
|
croak qq{\nThis only accepts two groups of data} if $k != 2; |
201
|
|
|
|
|
|
|
#y if there´s an options hash with priors check it - otherwise default... |
202
|
0
|
0
|
|
|
|
|
if ( defined $options_ref ) { ($p_x, $p_y) = &_priors($options_ref, @data) } |
|
0
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
else { |
204
|
0
|
|
|
|
|
|
print qq{\nDefaulting to priors of 0.5 for both x and y.}; |
205
|
0
|
|
|
|
|
|
$p_x = 0.5; |
206
|
0
|
|
|
|
|
|
$p_y = 0.5; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
#y check they have good matrix format - need to check compatibility too |
209
|
0
|
0
|
|
|
|
|
croak qq{\nbad data} if &Statistics::MVA::CVMat::_check($data[0]); |
210
|
0
|
0
|
|
|
|
|
croak qq{\nbad data} if &Statistics::MVA::CVMat::_check($data[1]); |
211
|
|
|
|
|
|
|
#y data is passed in table format - i.e. nested arrays (rows) correspond to observations and not variables |
212
|
0
|
|
|
|
|
|
my $lol1 = &Statistics::MVA::CVMat::transpose($data[0]); |
213
|
0
|
|
|
|
|
|
my $lol2 = &Statistics::MVA::CVMat::transpose($data[1]); |
214
|
|
|
|
|
|
|
#y get variable number |
215
|
0
|
|
|
|
|
|
my $p = scalar @{$lol1}; |
|
0
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
|
croak qq{\nThese data sets are not compatible} if ($p != scalar @{$lol2}); |
|
0
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
#y combine the data for overall means calculations - if using single data with names could use adjust in MVA |
218
|
0
|
|
|
|
|
|
my $full = []; |
219
|
0
|
|
|
|
|
|
$full = &_combine($lol1,$lol2, $p); |
220
|
|
|
|
|
|
|
#y copy vars and we will do in place mean calculation - no need to deep copy references here |
221
|
|
|
|
|
|
|
#my $first = &_deep_copy($lol1); |
222
|
0
|
|
|
|
|
|
my $first = [@{$lol1}]; |
|
0
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
my $second = [@{$lol2}]; |
|
0
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#y in place calculation of the means |
225
|
0
|
|
|
|
|
|
&_means($full,$p); |
226
|
0
|
|
|
|
|
|
&_means($first,$p); |
227
|
0
|
|
|
|
|
|
&_means($second,$p); |
228
|
|
|
|
|
|
|
#y in place subtraction of means from raw data |
229
|
0
|
|
|
|
|
|
&_subtract($lol1, $full); |
230
|
0
|
|
|
|
|
|
&_subtract($lol2, $full); |
231
|
|
|
|
|
|
|
#y for MVA cv_calculations the data needs to be in table format - this is wastefull as it just transpose - thus is unecessary transposes - IN PLACE - should modify this?!? |
232
|
0
|
|
|
|
|
|
$lol1 = &Statistics::MVA::CVMat::transpose($lol1); |
233
|
0
|
|
|
|
|
|
$lol2 = &Statistics::MVA::CVMat::transpose($lol2); |
234
|
0
|
|
|
|
|
|
my $mva; |
235
|
|
|
|
|
|
|
#y if we have a twat who likes this annoying method |
236
|
0
|
0
|
0
|
|
|
|
if (defined $options_ref && defined $options_ref->{cv} && $options_ref->{cv} == 0) { $mva = &_twats($lol1, $lol2); } |
|
0
|
|
0
|
|
|
|
|
237
|
|
|
|
|
|
|
else { |
238
|
|
|
|
|
|
|
#y create MVA object - these are defaults parameters so don´t need to pass them... |
239
|
0
|
|
|
|
|
|
$mva = Statistics::MVA->new([ $lol1, $lol2 ], {standardise => 0, divisor => 1}); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
#r CV matrix data - loose last [0] for realmatrix object |
242
|
|
|
|
|
|
|
#print Dumper $mva->[0][0][0][0]; |
243
|
|
|
|
|
|
|
#print Dumper $mva->[0][1][0][0]; |
244
|
|
|
|
|
|
|
#y create empty matrix of same dimensions |
245
|
0
|
|
|
|
|
|
my $c = $mva->[0][0][0]->shadow; |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
#/ its 2x not divided by priors?!? - only constant takes into account priors |
248
|
|
|
|
|
|
|
#$mva->[0][0][0]->multiply_scalar($mva->[0][0][0],$p_x); |
249
|
|
|
|
|
|
|
#$mva->[0][1][0]->multiply_scalar($mva->[0][1][0],$p_y); |
250
|
0
|
|
|
|
|
|
$mva->[0][0][0]->multiply_scalar($mva->[0][0][0],0.5); |
251
|
0
|
|
|
|
|
|
$mva->[0][1][0]->multiply_scalar($mva->[0][1][0],0.5); |
252
|
|
|
|
|
|
|
|
253
|
0
|
|
|
|
|
|
$c->add($mva->[0][0][0],$mva->[0][1][0]); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
#y no need for transpose just build from rows... |
256
|
0
|
|
|
|
|
|
my $averages_a = Math::MatrixReal->new_from_rows([$first]); |
257
|
0
|
|
|
|
|
|
my $averages_b = Math::MatrixReal->new_from_rows([$second]); |
258
|
|
|
|
|
|
|
#y calculate b_0 - constant discrimination coefficients |
259
|
0
|
|
|
|
|
|
my $constant_x = 0.5 * $averages_a * $c->inverse * ~$averages_a; |
260
|
0
|
|
|
|
|
|
my $constant_y = 0.5 * $averages_b * $c->inverse * ~$averages_b; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
#/ you must add the priors to the constant - only the constant takes into account the priors. |
263
|
|
|
|
|
|
|
#$constant_x->[0][0][0] = $constant_x->[0][0][0] - log($p_x); |
264
|
|
|
|
|
|
|
#$constant_y->[0][0][0] = $constant_y->[0][0][0] - log($p_y); |
265
|
|
|
|
|
|
|
#y now should be -0.5... |
266
|
0
|
|
|
|
|
|
$constant_x->[0][0][0] = -$constant_x->[0][0][0] + log($p_x); |
267
|
0
|
|
|
|
|
|
$constant_y->[0][0][0] = -$constant_y->[0][0][0] + log($p_y); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
#y calculate the rest of the discrimination coeficients |
270
|
|
|
|
|
|
|
#~$averages_a * $c->inverse * $averages_a; |
271
|
0
|
|
|
|
|
|
my $matrix_x = $averages_a * $c->inverse; |
272
|
0
|
|
|
|
|
|
my $matrix_y = $averages_b * $c->inverse; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
#y feed the objectvv- this is only done for persistance - otherwise we loose the data... |
275
|
0
|
|
|
|
|
|
$self->[0][0][0] = $constant_x; |
276
|
0
|
|
|
|
|
|
$self->[0][0][1] = $matrix_x; |
277
|
0
|
|
|
|
|
|
$self->[0][1][0] = $constant_y; |
278
|
0
|
|
|
|
|
|
$self->[0][1][1] = $matrix_y; |
279
|
0
|
|
|
|
|
|
$self->[1][0] = $p_x; |
280
|
0
|
|
|
|
|
|
$self->[1][1] = $p_y; |
281
|
0
|
|
|
|
|
|
$self->[2] = $p; |
282
|
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
bless $self, $class; |
284
|
0
|
|
|
|
|
|
return $self; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub _priors { |
288
|
0
|
|
|
0
|
|
|
my ($options_ref, @data) = @_; |
289
|
0
|
|
|
|
|
|
my $p_x; |
290
|
|
|
|
|
|
|
my $p_y; |
291
|
0
|
0
|
|
|
|
|
if ($options_ref->{priors} == 1 ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
my $n_x = scalar @{$data[0]}; |
|
0
|
|
|
|
|
|
|
293
|
0
|
|
|
|
|
|
my $n_y = scalar @{$data[1]}; |
|
0
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
my $n_total = $n_x + $n_y; |
295
|
0
|
|
|
|
|
|
$p_x = sprintf (q{%.5f}, $n_x / $n_total); |
296
|
0
|
|
|
|
|
|
$p_y = sprintf (q{%.5f}, $n_y / $n_total); |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
elsif (ref $options_ref->{priors} eq q{ARRAY} ) { |
299
|
0
|
|
|
|
|
|
my @priors = @{$options_ref->{priors}}; |
|
0
|
|
|
|
|
|
|
300
|
0
|
0
|
|
|
|
|
croak qq{\nIf passing an ARRAY ref of priors it must have length two.} if ( scalar @priors != 2 ); |
301
|
0
|
0
|
0
|
|
|
|
croak qq{\nThe priors must sum to 1.} if ( $priors[0] + $priors[1] < 0.95 || $priors[0] + $priors[1] > 1.05 ); |
302
|
0
|
|
|
|
|
|
$p_x = $priors[0]; |
303
|
0
|
|
|
|
|
|
$p_y = $priors[1]; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
elsif ($options_ref->{priors} == 0) { |
306
|
0
|
|
|
|
|
|
$p_x = 0.5; |
307
|
0
|
|
|
|
|
|
$p_y = 0.5; |
308
|
|
|
|
|
|
|
} |
309
|
0
|
|
|
|
|
|
else { croak qq{\nI do not recognise that value for the priors option} } |
310
|
0
|
|
|
|
|
|
return ($p_x, $p_y); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
sub _subtract { |
314
|
0
|
|
|
0
|
|
|
my ($lol, $full) = @_; |
315
|
0
|
|
|
|
|
|
for my $var (0..$#{$lol}) { |
|
0
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
|
for my $cell (0..$#{$lol->[$var]}) { |
|
0
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
$lol->[$var][$cell] = $lol->[$var][$cell] - $full->[$var]; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _combine{ |
323
|
0
|
|
|
0
|
|
|
my ($lol1, $lol2, $p) = @_; |
324
|
0
|
|
|
|
|
|
my $full = []; |
325
|
0
|
|
|
|
|
|
for my $i (0..$p-1) { |
326
|
0
|
|
|
|
|
|
push @{$full->[$i]}, @{$lol1->[$i]}, @{$lol2->[$i]}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} |
328
|
0
|
|
|
|
|
|
return $full; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
sub _means { |
332
|
0
|
|
|
0
|
|
|
my ($full, $p) = @_; |
333
|
0
|
|
|
|
|
|
for my $i (0..$p-1) { |
334
|
0
|
|
|
|
|
|
my $sum = sum @{$full->[$i]}; |
|
0
|
|
|
|
|
|
|
335
|
0
|
|
|
|
|
|
my $n = scalar @{$full->[$i]}; |
|
0
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
|
$full->[$i] = $sum/$n; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub _twats { |
341
|
0
|
|
|
0
|
|
|
my ($lol1, $lol2) = @_; |
342
|
0
|
|
|
|
|
|
my $lol1_mat = Math::MatrixReal->new_from_rows($lol1); |
343
|
0
|
|
|
|
|
|
my $n_1 = scalar @{$lol1}; |
|
0
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
|
$lol1_mat = (~$lol1_mat * $lol1_mat) / $n_1; |
345
|
0
|
|
|
|
|
|
my $lol2_mat = Math::MatrixReal->new_from_rows($lol2); |
346
|
0
|
|
|
|
|
|
my $n_2 = scalar @{$lol2}; |
|
0
|
|
|
|
|
|
|
347
|
0
|
|
|
|
|
|
$lol2_mat = (~$lol2_mat * $lol2_mat) / $n_2; |
348
|
0
|
|
|
|
|
|
my $mva = []; |
349
|
0
|
|
|
|
|
|
$mva->[0][0][0] = $lol1_mat; |
350
|
0
|
|
|
|
|
|
$mva->[0][1][0] = $lol2_mat; |
351
|
0
|
|
|
|
|
|
print $mva->[0][0][0]; |
352
|
0
|
|
|
|
|
|
print $mva->[0][1][0]; |
353
|
0
|
|
|
|
|
|
return $mva; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub output { |
357
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
358
|
0
|
|
|
|
|
|
my $constant_x = $self->[0][0][0]; |
359
|
0
|
|
|
|
|
|
my $constant_y = $self->[0][1][0]; |
360
|
0
|
|
|
|
|
|
my $matrix_x = $self->[0][0][1]; |
361
|
0
|
|
|
|
|
|
my $matrix_y = $self->[0][1][1]; |
362
|
0
|
|
|
|
|
|
my $p_x = $self->[1][0]; |
363
|
0
|
|
|
|
|
|
my $p_y = $self->[1][1]; |
364
|
0
|
|
|
|
|
|
my $p_var_num = $self->[2]; |
365
|
0
|
0
|
|
|
|
|
if (!wantarray) { |
366
|
0
|
|
|
|
|
|
print qq{\nTwo-groups with $p_var_num variables each and prior probabilities of p(x) = $p_x and p(y) = $p_y.\n\n}; |
367
|
0
|
|
|
|
|
|
my @config = ( [17, q{}], [15, q{X}], [15, q{Y}] ); |
368
|
0
|
|
|
|
|
|
my $tbl = Text::SimpleTable->new(@config); |
369
|
0
|
|
|
|
|
|
$tbl->row( qq{Constant B[0]}, sprintf(q{%.5f}, $constant_x->[0][0][0]), sprintf(q{%.5f}, $constant_y->[0][0][0]) ); |
370
|
0
|
|
|
|
|
|
$tbl->hr; |
371
|
0
|
|
|
|
|
|
for my $row (0..$#{$matrix_x->[0][0]}) { |
|
0
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
|
my $coeff = $row+1; |
373
|
0
|
|
|
|
|
|
$tbl->row( qq{Coefficient B[$coeff]}, sprintf(q{%.5f}, $matrix_x->[0][0][$row]), sprintf(q{%.5f}, $matrix_y->[0][0][$row]) ); |
374
|
0
|
0
|
|
|
|
|
$tbl->hr if $row != $#{$matrix_x->[0][0]}; |
|
0
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
} |
376
|
0
|
|
|
|
|
|
print $tbl->draw; |
377
|
0
|
|
|
|
|
|
return; |
378
|
|
|
|
|
|
|
} |
379
|
0
|
|
|
|
|
|
else { return ( $p_x, $constant_x->[0][0][0], $matrix_x->[0][0], $p_y, $constant_y->[0][0][0], $matrix_y->[0][0] ); } |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub discriminate { |
383
|
0
|
|
|
0
|
1
|
|
my ($self, $ex) = @_; |
384
|
0
|
|
|
|
|
|
my $context = wantarray; |
385
|
0
|
|
|
|
|
|
my $constant_x = $self->[0][0][0]; |
386
|
0
|
|
|
|
|
|
my $constant_y = $self->[0][1][0]; |
387
|
0
|
|
|
|
|
|
my $matrix_x = $self->[0][0][1]; |
388
|
0
|
|
|
|
|
|
my $matrix_y = $self->[0][1][1]; |
389
|
0
|
|
|
|
|
|
my $p_x = $self->[1][0]; |
390
|
0
|
|
|
|
|
|
my $p_y = $self->[1][1]; |
391
|
0
|
|
|
|
|
|
my $p_var_num = $self->[2]; |
392
|
|
|
|
|
|
|
|
393
|
0
|
0
|
|
|
|
|
croak qq{\nData must be passed as an ARRAY ref} if (ref $ex ne q{ARRAY}); |
394
|
0
|
0
|
|
|
|
|
croak qq{\nThis example has the wrong variable number} if scalar @{$ex} != $p_var_num; |
|
0
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
|
396
|
0
|
|
|
|
|
|
my $example = Math::MatrixReal->new_from_cols([$ex]); |
397
|
0
|
|
|
|
|
|
my $val_x = ( $matrix_x * $example ) + $constant_x;#; + log($p_x); |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
#r when adding log(p) its the linear score function and not simply the linear discrimination result |
400
|
|
|
|
|
|
|
#$val_x = $val_x->[0][0][0];#+log($p_x); |
401
|
0
|
|
|
|
|
|
$val_x = $val_x->[0][0][0]; |
402
|
0
|
|
|
|
|
|
my $val_y = ( $matrix_y * $example ) + $constant_y;#; + log($p_x); |
403
|
|
|
|
|
|
|
#$val_y = $val_y->[0][0][0];#+log($p_y); |
404
|
0
|
|
|
|
|
|
$val_y = $val_y->[0][0][0]; |
405
|
|
|
|
|
|
|
|
406
|
0
|
0
|
|
|
|
|
my $type = $val_x > $val_y ? q{X} |
|
|
0
|
|
|
|
|
|
407
|
|
|
|
|
|
|
: $val_y > $val_x ? q{Y} |
408
|
|
|
|
|
|
|
: undef; |
409
|
|
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
my $post_p_x = exp($val_x) / (exp($val_x) + exp($val_y)); |
411
|
0
|
|
|
|
|
|
my $post_p_y = exp($val_y) / (exp($val_x) + exp($val_y)); |
412
|
|
|
|
|
|
|
|
413
|
0
|
0
|
|
|
|
|
if (!$context) { |
414
|
0
|
|
|
|
|
|
print qq{\nLinear score function for x = $val_x\nLinear score function for y = $val_y\n}; |
415
|
0
|
0
|
|
|
|
|
if ( defined $type ) { print qq{\nExample (@{$ex}) is a \x27$type\x27.} } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
416
|
0
|
|
|
|
|
|
else { print qq{\nCannot distinguish which group the example belongs to.} } |
417
|
0
|
|
|
|
|
|
print qq{\n\nPrior probability of being an x = $p_x\nPosterior probability of being an x = $post_p_x\n}; |
418
|
0
|
|
|
|
|
|
print qq{\nPrior probability of being a y = $p_y\nPosterior probability of being a y = $post_p_y\n}; |
419
|
0
|
|
|
|
|
|
return; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
else { |
422
|
0
|
|
|
|
|
|
return ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
__END__ |