File Coverage

blib/lib/Statistics/LineFit.pm
Criterion Covered Total %
statement 211 215 98.1
branch 119 138 86.2
condition 7 12 58.3
subroutine 22 22 100.0
pod 12 17 70.5
total 371 404 91.8


line stmt bran cond sub pod time code
1             package Statistics::LineFit;
2 14     14   367833 use strict;
  14         36  
  14         936  
3 14     14   73 use Carp qw(carp);
  14         22  
  14         1077  
4             BEGIN {
5 14     14   84 use Exporter ();
  14         40  
  14         353  
6 14     14   81 use vars qw ($AUTHOR $VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  14         21  
  14         2101  
7 14     14   29 $AUTHOR = 'Richard Anderson ';
8 14         44 @EXPORT = @EXPORT_OK = qw();
9 14         42 %EXPORT_TAGS = ();
10 14         212 @ISA = qw(Exporter);
11 14         38478 $VERSION = 0.06;
12             }
13              
14             sub new {
15             #
16             # Purpose: Create a new Statistics::LineFit object
17             #
18 15     15 1 151419 my ($caller, $validate, $hush) = @_;
19 15 100       168 my $self = { doneRegress => 0,
    100          
20             gotData => 0,
21             hush => defined $hush ? $hush : 0,
22             validate => defined $validate ? $validate : 0,
23             };
24 15   33     135 bless $self, ref($caller) || $caller;
25 15         81 return $self;
26             }
27              
28             sub coefficients {
29             #
30             # Purpose: Return the slope and intercept from least squares line fit
31             #
32 17     17 1 605 my $self = shift;
33 17 100 66     125 unless (defined $self->{intercept} and defined $self->{slope}) {
34 12 100       51 $self->regress() or return;
35             }
36 15         748 return ($self->{intercept}, $self->{slope});
37             }
38              
39             sub computeSums {
40             #
41             # Purpose: Compute sum of x, y, x**2, y**2 and x*y (private method)
42             #
43 14     14 0 524 my $self = shift;
44 14         39 my ($sumX, $sumY, $sumXX, $sumYY, $sumXY) = (0, 0, 0, 0, 0);
45 14 100       58 if (defined $self->{weight}) {
46 5         20 for (my $i = 0; $i < $self->{numXY}; ++$i) {
47 213         1076 $sumX += $self->{weight}[$i] * $self->{x}[$i];
48 213         362 $sumY += $self->{weight}[$i] * $self->{y}[$i];
49 213         366 $sumXX += $self->{weight}[$i] * $self->{x}[$i] ** 2;
50 213         399 $sumYY += $self->{weight}[$i] * $self->{y}[$i] ** 2;
51 213         704 $sumXY += $self->{weight}[$i] * $self->{x}[$i]
52             * $self->{y}[$i];
53             }
54             } else {
55 9         47 for (my $i = 0; $i < $self->{numXY}; ++$i) {
56 100030         125902 $sumX += $self->{x}[$i];
57 100030         140433 $sumY += $self->{y}[$i];
58 100030         118488 $sumXX += $self->{x}[$i] ** 2;
59 100030         126448 $sumYY += $self->{y}[$i] ** 2;
60 100030         270520 $sumXY += $self->{x}[$i] * $self->{y}[$i];
61             }
62             }
63 14         87 return ($sumX, $sumY, $sumXX, $sumYY, $sumXY);
64             }
65              
66             sub durbinWatson {
67             #
68             # Purpose: Return the Durbin-Watson statistic
69             #
70 16     16 1 10623 my $self = shift;
71 16 100       627 unless (defined $self->{durbinWatson}) {
72 15 100       639 $self->regress() or return;
73 14         27 my $sumErrDiff = 0;
74 14         67 my $errorTMinus1 = $self->{y}[0] - ($self->{intercept} + $self->{slope}
75             * $self->{x}[0]);
76 14         60 for (my $i = 1; $i < $self->{numXY}; ++$i) {
77 100229         192638 my $error = $self->{y}[$i] - ($self->{intercept} + $self->{slope}
78             * $self->{x}[$i]);
79 100229         107398 $sumErrDiff += ($error - $errorTMinus1) ** 2;
80 100229         218496 $errorTMinus1 = $error;
81             }
82 14 100       58 $self->{durbinWatson} = $self->sumSqErrors() > 0 ?
83             $sumErrDiff / $self->sumSqErrors() : 0;
84             }
85 15         77 return $self->{durbinWatson};
86             }
87              
88             sub meanSqError {
89             #
90             # Purpose: Return the mean squared error
91             #
92 16     16 1 4082 my $self = shift;
93 16 100       86 unless (defined $self->{meanSqError}) {
94 15 100       46 $self->regress() or return;
95 14         42 $self->{meanSqError} = $self->sumSqErrors() / $self->{numXY};
96             }
97 15         74 return $self->{meanSqError};
98             }
99              
100             sub predictedYs {
101             #
102             # Purpose: Return the predicted y values
103             #
104 28     28 1 951 my $self = shift;
105 28 100       98 unless (defined $self->{predictedYs}) {
106 15 100       52 $self->regress() or return;
107 14         49 $self->{predictedYs} = [];
108 14         64 for (my $i = 0; $i < $self->{numXY}; ++$i) {
109 100243         272968 $self->{predictedYs}[$i] = $self->{intercept}
110             + $self->{slope} * $self->{x}[$i];
111             }
112             }
113 27         48 return @{$self->{predictedYs}};
  27         19763  
114             }
115              
116             sub regress {
117             #
118             # Purpose: Do weighted or unweighted least squares 2-D line fit (if needed)
119             #
120             # Description:
121             # The equations below apply to both the weighted and unweighted fit: the
122             # weights are normalized in setWeights(), so the sum of the weights is
123             # equal to numXY.
124             #
125 131     131 1 188 my $self = shift;
126 131 100       602 return $self->{regressOK} if $self->{doneRegress};
127 24 100       637 unless ($self->{gotData}) {
128 10 50       17 carp "No valid data input - can't do regression" unless $self->{hush};
129 10         57 return 0;
130             }
131 14         52 my ($sumX, $sumY, $sumYY, $sumXY);
132 14         55 ($sumX, $sumY, $self->{sumXX}, $sumYY, $sumXY) = $self->computeSums();
133 14         70 $self->{sumSqDevX} = $self->{sumXX} - $sumX ** 2 / $self->{numXY};
134 14 50       59 if ($self->{sumSqDevX} != 0) {
135 14         176 $self->{sumSqDevY} = $sumYY - $sumY ** 2 / $self->{numXY};
136 14         1227 $self->{sumSqDevXY} = $sumXY - $sumX * $sumY / $self->{numXY};
137 14         39 $self->{slope} = $self->{sumSqDevXY} / $self->{sumSqDevX};
138 14         46 $self->{intercept} = ($sumY - $self->{slope} * $sumX) / $self->{numXY};
139 14         29 $self->{regressOK} = 1;
140             } else {
141 0 0       0 carp "Can't fit line when x values are all equal" unless $self->{hush};
142 0         0 $self->{sumXX} = $self->{sumSqDevX} = undef;
143 0         0 $self->{regressOK} = 0;
144             }
145 14         31 $self->{doneRegress} = 1;
146 14         1164 return $self->{regressOK};
147             }
148              
149             sub residuals {
150             #
151             # Purpose: Return the predicted Y values minus the observed Y values
152             #
153 15     15 1 11521 my $self = shift;
154 15 100       64 unless (defined $self->{residuals}) {
155 14 100       48 $self->regress() or return;
156 13         32 $self->{residuals} = [];
157 13         132 for (my $i = 0; $i < $self->{numXY}; ++$i) {
158 243         1019 $self->{residuals}[$i] = $self->{y}[$i] - ($self->{intercept}
159             + $self->{slope} * $self->{x}[$i]);
160             }
161             }
162 14         25 return @{$self->{residuals}};
  14         101  
163             }
164              
165             sub rSquared {
166             #
167             # Purpose: Return the correlation coefficient
168             #
169 16     16 1 12202 my $self = shift;
170 16 100       73 unless (defined $self->{rSquared}) {
171 15 100       51 $self->regress() or return;
172 14         46 my $denom = $self->{sumSqDevX} * $self->{sumSqDevY};
173 14 50       112 $self->{rSquared} = $denom != 0 ? $self->{sumSqDevXY} ** 2 / $denom : 1;
174             }
175 15         83 return $self->{rSquared};
176             }
177              
178             sub setData {
179             #
180             # Purpose: Initialize (x,y) values and optional weights
181             #
182 29     29 1 1075 my ($self, $x, $y, $weights) = @_;
183 29         132 $self->{doneRegress} = 0;
184 29         386 $self->{x} = $self->{y} = $self->{numXY} = $self->{weight}
185             = $self->{intercept} = $self->{slope} = $self->{rSquared}
186             = $self->{sigma} = $self->{durbinWatson} = $self->{meanSqError}
187             = $self->{sumSqErrors} = $self->{tStatInt} = $self->{tStatSlope}
188             = $self->{predictedYs} = $self->{residuals} = $self->{sumXX}
189             = $self->{sumSqDevX} = $self->{sumSqDevY} = $self->{sumSqDevXY}
190             = undef;
191 29 100       117 if (@$x < 2) {
192 2 50       8 carp "Must input more than one data point!" unless $self->{hush};
193 2         11 return 0;
194             }
195 27         63 $self->{numXY} = @$x;
196 27 100       100 if (ref $x->[0]) {
197 5 100       16 $self->setWeights($y) or return 0;
198 2         6 $self->{x} = [ ];
199 2         7 $self->{y} = [ ];
200 2         8 foreach my $xy (@$x) {
201 8         11 push @{$self->{x}}, $xy->[0];
  8         16  
202 8         10 push @{$self->{y}}, $xy->[1];
  8         14  
203             }
204             } else {
205 22 100       96 if (@$x != @$y) {
206 1 50       4 carp "Length of x and y arrays must be equal!" unless $self->{hush};
207 1         5 return 0;
208             }
209 21 100       75 $self->setWeights($weights) or return 0;
210 16         15519 $self->{x} = [ @$x ];
211 16         10077 $self->{y} = [ @$y ];
212             }
213 18 100       95 if ($self->{validate}) {
214 5 100       28 unless ($self->validData()) {
215 4         11 $self->{x} = $self->{y} = $self->{weights} = $self->{numXY} = undef;
216 4         18 return 0;
217             }
218             }
219 14         31 $self->{gotData} = 1;
220 14         102 return 1;
221             }
222              
223             sub setWeights {
224             #
225             # Purpose: Normalize and initialize line fit weighting factors (private method)
226             #
227 26     26 0 47 my ($self, $weights) = @_;
228 26 100       102 return 1 unless defined $weights;
229 13 100       44 if (@$weights != $self->{numXY}) {
230 2 50       6 carp "Length of weight array must equal length of data array!"
231             unless $self->{hush};
232 2         17 return 0;
233             }
234 11 100       43 if ($self->{validate}) { $self->validWeights($weights) or return 0 }
  3 100       11  
235 9         16 my $sumW = my $numNonZero = 0;
236 9         20 foreach my $weight (@$weights) {
237 221 100       377 if ($weight < 0) {
238 2 50       8 carp "Weights must be non-negative numbers!" unless $self->{hush};
239 2         14 return 0;
240             }
241 219         217 $sumW += $weight;
242 219 100       367 if ($weight != 0) { ++$numNonZero }
  212         275  
243             }
244 7 100       26 if ($numNonZero < 2) {
245 2 50       6 carp "At least two weights must be nonzero!" unless $self->{hush};
246 2         11 return 0;
247             }
248 5         15 my $factor = @$weights / $sumW;
249 5         12 foreach my $weight (@$weights) { $weight *= $factor }
  213         445  
250 5         52 $self->{weight} = [ @$weights ];
251 5         19 return 1;
252             }
253              
254             sub sigma {
255             #
256             # Purpose: Return the estimated homoscedastic standard deviation of the
257             # error term
258             #
259 44     44 1 556 my $self = shift;
260 44 100       137 unless (defined $self->{sigma}) {
261 15 100       58 $self->regress() or return;
262 14 100       66 $self->{sigma} = $self->{numXY} > 2 ?
263             sqrt($self->sumSqErrors() / ($self->{numXY} - 2)) : 0;
264             }
265 43         184 return $self->{sigma};
266             }
267              
268             sub sumSqErrors {
269             #
270             # Purpose: Return the sum of the squared errors (private method)
271             #
272 62     62 0 7574 my $self = shift;
273 62 100       191 unless (defined $self->{sumSqErrors}) {
274 14 50       38 $self->regress() or return;
275 14         68 $self->{sumSqErrors} = $self->{sumSqDevY} - $self->{sumSqDevX}
276             * $self->{slope} ** 2;
277 14 50       60 if ($self->{sumSqErrors} < 0) { $self->{sumSqErrors} = 0 }
  0         0  
278             }
279 62         319 return $self->{sumSqErrors};
280             }
281              
282             sub tStatistics {
283             #
284             # Purpose: Return the T statistics
285             #
286 16     16 1 1111 my $self = shift;
287 16 100 66     93 unless (defined $self->{tStatInt} and defined $self->{tStatSlope}) {
288 15 100       48 $self->regress() or return;
289 14         44 my $biasEstimateInt = $self->sigma() * sqrt($self->{sumXX}
290             / ($self->{sumSqDevX} * $self->{numXY}));
291 14 100       81 $self->{tStatInt} = $biasEstimateInt != 0 ?
292             $self->{intercept} / $biasEstimateInt : 0;
293 14         44 my $biasEstimateSlope = $self->sigma() / sqrt($self->{sumSqDevX});
294 14 100       67 $self->{tStatSlope} = $biasEstimateSlope != 0 ?
295             $self->{slope} / $biasEstimateSlope : 0;
296             }
297 15         67 return ($self->{tStatInt}, $self->{tStatSlope});
298             }
299              
300             sub validData {
301             #
302             # Purpose: Verify that the input x-y data are numeric (private method)
303             #
304 5     5 0 9 my $self = shift;
305 5         20 for (my $i = 0; $i < $self->{numXY}; ++$i) {
306 12 100       55 if (not defined $self->{x}[$i]) {
307 1 50       4 carp "Input x[$i] is not defined" unless $self->{hush};
308 1         3 return 0;
309             }
310 11 100       58 if ($self->{x}[$i] !~
311             /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
312             {
313 1 50       4 carp "Input x[$i] is not a number: $self->{x}[$i]"
314             unless $self->{hush};
315 1         3 return 0;
316             }
317 10 100       25 if (not defined $self->{y}[$i]) {
318 1 50       4 carp "Input y[$i] is not defined" unless $self->{hush};
319 1         3 return 0;
320             }
321 9 100       67 if ($self->{y}[$i] !~
322             /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
323             {
324 1 50       20 carp "Input y[$i] is not a number: $self->{y}[$i]"
325             unless $self->{hush};
326 1         4 return 0;
327             }
328             }
329 1         584 return 1;
330             }
331              
332             sub validWeights {
333             #
334             # Purpose: Verify that the input weights are numeric (private method)
335             #
336 3     3 0 5 my ($self, $weights) = @_;
337 3         25 for (my $i = 0; $i < @$weights; ++$i) {
338 9 100       24 if (not defined $weights->[$i]) {
339 1 50       4 carp "Input weights[$i] is not defined" unless $self->{hush};
340 1         7 return 0;
341             }
342 8 100       80 if ($weights->[$i]
343             !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)
344             {
345 1 50       6 carp "Input weights[$i] is not a number: $weights->[$i]"
346             unless $self->{hush};
347 1         10 return 0;
348             }
349             }
350 1         5 return 1;
351             }
352              
353             sub varianceOfEstimates {
354             #
355             # Purpose: Return the variances in the estimates of the intercept and slope
356             #
357 16     16 1 13271 my $self = shift;
358 16 100 66     170 unless (defined $self->{intercept} and defined $self->{slope}) {
359 1 50       4 $self->regress() or return;
360             }
361 15         55 my @predictedYs = $self->predictedYs();
362 15         2413 my ($s, $sx, $sxx) = (0, 0, 0);
363 15 100       85 if (defined $self->{weight}) {
364 5         31 for (my $i = 0; $i < $self->{numXY}; ++$i) {
365 213         395 my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2;
366 213 100       410 next if 0 == $variance;
367 209         257 $s += 1.0 / $variance;
368 209         379 $sx += $self->{weight}[$i] * $self->{x}[$i] / $variance;
369 209         624 $sxx += $self->{weight}[$i] * $self->{x}[$i] ** 2 / $variance;
370             }
371             } else {
372 10         59 for (my $i = 0; $i < $self->{numXY}; ++$i) {
373 100034         160238 my $variance = ($predictedYs[$i] - $self->{y}[$i]) ** 2;
374 100034 100       167010 next if 0 == $variance;
375 100027         96532 $s += 1.0 / $variance;
376 100027         143450 $sx += $self->{x}[$i] / $variance;
377 100027         230197 $sxx += $self->{x}[$i] ** 2 / $variance;
378             }
379             }
380 15         45 my $denominator = ($s * $sxx - $sx ** 2);
381 15 100       57 if (0 == $denominator) {
382 3         9 return;
383             } else {
384 12         2457 return ($sxx / $denominator, $s / $denominator);
385             }
386             }
387              
388             1;
389              
390             __END__