File Coverage

blib/lib/Statistics/Regression.pm
Criterion Covered Total %
statement 108 271 39.8
branch 28 108 25.9
condition 3 13 23.0
subroutine 13 30 43.3
pod 0 25 0.0
total 152 447 34.0


line stmt bran cond sub pod time code
1             package Statistics::Regression;
2              
3             $VERSION = '0.53';
4             my $DATE = "2007/07/07";
5             my $MNAME= "$0::Statistics::Regression";
6              
7 1     1   41902 use strict;
  1         3  
  1         134  
8 1     1   7 use warnings FATAL => qw{ uninitialized };
  1         2  
  1         53  
9              
10 1     1   7 use Carp;
  1         3  
  1         149  
11              
12             ################################################################
13             =pod
14              
15             =head1 NAME
16              
17             Regression.pm - weighted linear regression package (line+plane fitting)
18              
19              
20             =head1 SYNOPSIS
21              
22             use Statistics::Regression;
23              
24             # Create regression object
25             my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
26              
27             # Add data points
28             $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
29             $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
30             $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
31             $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
32              
33             or
34              
35             my %d;
36             $d{const} = 1.0; $d{someX}= 5.0; $d{someY}= 2.0; $d{ignored}="anything else";
37             $reg->include( 3.0, \%d ); # names are picked off the Regression specification
38              
39             Please note that *you* must provide the constant if you want one.
40              
41             # Finally, print the result
42             $reg->print();
43              
44             This prints the following:
45              
46             ****************************************************************
47             Regression 'sample regression'
48             ****************************************************************
49             Name Theta StdErr T-stat
50             [0='const'] 0.2950 6.0512 0.05
51             [1='someX'] 0.6723 0.3278 2.05
52             [2='someY'] 1.0688 2.7954 0.38
53              
54             R^2= 0.808, N= 4
55             ****************************************************************
56              
57              
58              
59             The hash input method has the advantage that you can now just
60             fill the observation hashes with all your variables, and use the
61             same code to run regression, changing the regression
62             specification at one and only one spot (the new() invokation).
63             You do not need to change the inputs in the include() statement.
64             For example,
65              
66             my @obs; ## a global variable. observations are like: %oneobs= %{$obs[1]};
67              
68             sub run_regression {
69             my $reg = Statistics::Regression->new( $_[0], $_[2] );
70             foreach my $obshashptr (@obs) { $reg->include( $_[1], $_[3] ); }
71             $reg->print();
72             }
73              
74             run_regression("bivariate regression", $obshashptr->{someY}, [ "const", "someX" ] );
75             run_regression("trivariate regression", $obshashptr->{someY}, [ "const", "someX", "someZ" ] );
76              
77              
78              
79             Of course, you can use the subroutines to do the printing work yourself:
80              
81             my @theta = $reg->theta();
82             my @se = $reg->standarderrors();
83             my $rsq = $reg->rsq();
84             my $adjrsq = $reg->adjrsq();
85             my $ybar = $reg->ybar(); ## the average of the y vector
86             my $sst = $reg->sst(); ## the sum-squares-total
87             my $sigmasq= $reg->sigmasq(); ## the variance of the residual
88             my $k = $reg->k(); ## the number of variables
89             my $n = $reg->n(); ## the number of observations
90              
91             In addition, there are some other helper routines, and a
92             subroutine linearcombination_variance(). If you don't know what
93             this is, don't use it.
94              
95              
96             =head1 BACKGROUND WARNING
97              
98             You should have an understanding of OLS regressions if you want
99             to use this package. You can get this from an introductory
100             college econometrics class and/or from most intermediate college
101             statistics classes. If you do not have this background
102             knowledge, then this package will remain a mystery to you.
103             There is no support for this package--please don't expect any.
104              
105              
106             =head1 DESCRIPTION
107              
108             Regression.pm is a multivariate linear regression package. That
109             is, it estimates the c coefficients for a line-fit of the type
110              
111             y= c(0)*x(0) + c(1)*x1 + c(2)*x2 + ... + c(k)*xk
112              
113             given a data set of N observations, each with k independent x
114             variables and one y variable. Naturally, N must be greater than
115             k---and preferably considerably greater. Any reasonable
116             undergraduate statistics book will explain what a regression is.
117             Most of the time, the user will provide a constant ('1') as x(0)
118             for each observation in order to allow the regression package to
119             fit an intercept.
120              
121              
122             =head1 ALGORITHM
123              
124             =head2 Original Algorithm (ALGOL-60):
125              
126             W. M. Gentleman, University of Waterloo, "Basic
127             Description For Large, Sparse Or Weighted Linear Least
128             Squares Problems (Algorithm AS 75)," Applied Statistics
129             (1974) Vol 23; No. 3
130              
131             Gentleman's algorithm is I statistical standard. Insertion
132             of a new observation can be done one observation at any time
133             (WITH A WEIGHT!), and still only takes a low quadratic time.
134             The storage space requirement is of quadratic order (in the
135             indep variables). A practically infinite number of observations
136             can easily be processed!
137              
138             =head2 Internal Data Structures
139              
140             R=Rbar is an upperright triangular matrix, kept in normalized
141             form with implicit 1's on the diagonal. D is a diagonal scaling
142             matrix. These correspond to "standard Regression usage" as
143              
144             X' X = R' D R
145              
146             A backsubsitution routine (in thetacov) allows to invert the R
147             matrix (the inverse is upper-right triangular, too!). Call this
148             matrix H, that is H=R^(-1).
149              
150             (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
151             = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
152              
153              
154             =head1 BUGS/PROBLEMS
155              
156             None known.
157              
158             =over 4
159              
160             =item Perl Problem
161              
162             Unfortunately, perl is unaware of IEEE number representations.
163             This makes it a pain to test whether an observation contains any
164             missing variables (coded as 'NaN' in Regression.pm).
165              
166             =back
167              
168             =for comment
169             pod2html -noindex -title "perl weighted least squares regression package" Regression.pm > Regression.html
170              
171              
172             =head1 VERSION and RECENT CHANGES
173              
174             2007/04/04: Added Coefficient Standard Errors
175              
176             2007/07/01: Added self-test use (if invoked as perl Regression.pm)
177             at the end. cleaned up some print sprintf.
178             changed syntax on new() to eliminate passing K.
179              
180             2007/07/07: allowed passing hash with names to include().
181              
182              
183             =head1 AUTHOR
184              
185             Naturally, Gentleman invented this algorithm. It was adaptated
186             by Ivo Welch. Alan Miller (alan\@dmsmelb.mel.dms.CSIRO.AU)
187             pointed out nicer ways to compute the R^2. Ivan Tubert-Brohman
188             helped wrap the module as as a standard CPAN distribution.
189              
190             =head1 LICENSE
191              
192             This module is released for free public use under a GPL license.
193              
194             (C) Ivo Welch, 2001,2004, 2007.
195              
196             =cut
197              
198              
199             ################################################################
200             #### let's start with handling of missing data ("nan" or "NaN")
201             ################################################################
202 1     1   6 use constant TINY => 1e-8;
  1         1  
  1         4534  
203             my $nan= "NaN";
204              
205             sub isNaN {
206 16 50   16 0 80 if ($_[0] !~ /[0-9nan]/) { confess "$MNAME:isNaN: definitely not a number in NaN: '$_[0]'"; }
  0         0  
207 16   33     85 return ($_[0]=~ /NaN/i) || ($_[0] != $_[0]);
208             }
209              
210              
211             ################################################################
212             ### my $reg = Statistics::Regression->new($regname, \@var_names)
213             ###
214             ### Receives the number of variables on each observations (i.e.,
215             ### an integer) and returns the blessed data structure as a
216             ### Statistics::Regression object. Also takes an optional name
217             ### for this regression to remember, as well as a reference to a
218             ### k*1 array of names for the X coefficients.
219             ###
220             ### I have now made it mandatory to give some names.
221             ###
222             ################################################################
223             sub new {
224 1 50   1 0 13 my $classname= shift; (!ref($classname)) or confess "$MNAME:new: bad class call to new ($classname).\n";
  1         3  
225 1   50     7 my $regname= shift || "no-name";
226 1         2 my $xnameptr= shift;
227              
228 1 50       3 (defined($regname)) or confess "$MNAME:new: bad name in for regression. no undef allowed.\n";
229 1 50       4 (!ref($regname)) or confess "$MNAME:new: bad name in for regression.\n";
230 1 50       2 (defined($xnameptr)) or confess "$MNAME:new: You must provide variable names, because this tells me the number of columns. no undef allowed.\n";
231 1 50       5 (ref($xnameptr) eq "ARRAY") or confess "$MNAME:new: bad xnames for regression. Must be pointer.\n";
232              
233 1         3 my $K= (@{$xnameptr});
  1         2  
234              
235 1 50       4 if (!defined($K)) { confess "$MNAME:new: cannot determine the number of variables"; }
  0         0  
236 1 50       3 if ($K<=1) { confess "$MNAME:new: Cannot run a regression without at least two variables."; }
  0         0  
237              
238             sub zerovec {
239 3     3 0 4 my @rv;
240 3         8 for (my $i=0; $i<=$_[0]; ++$i) { $rv[$i]=0; }
  16         52  
241 3         30 return \@rv;
242             }
243              
244 1         5 bless {
245             k => $K,
246             regname => $regname,
247             xnames => $xnameptr,
248              
249             # constantly updated
250             n => 0,
251             sse => 0,
252             syy => 0,
253             sy => 0,
254             wghtn => 0,
255             d => zerovec($K),
256             thetabar => zerovec($K),
257             rbarsize => ($K+1)*$K/2+1,
258             rbar => zerovec(($K+1)*$K/2+1),
259              
260             # other constants
261             neverabort => 0,
262              
263             # computed on demand
264             theta => undef,
265             sigmasq => undef,
266             rsq => undef,
267             adjrsq => undef
268             }, $classname;
269             }
270              
271              
272             ################################################################
273             ### $reg->include( $y, [ $x1, $x2, $x3 ... $xk ], $weight );
274             ###
275             ### Add one new observation. The weight is optional. Note that
276             ### inclusion with a weight of -1 can be used to delete an
277             ### observation.
278             ###
279             ### The error checking and transfer of arguments is clutzy, but
280             ### works. if I had POSIX assured, I could do better number
281             ### checking. right now, I don't do any.
282             ###
283             ### Returns the number of observations so far included.
284             ################################################################
285             sub include {
286 4     4 0 20 my $this = shift;
287 4         5 my $yelement= shift;
288 4         6 my $xin= shift;
289 4   50     17 my $weight= shift || 1.0;
290              
291             # modest input checking;
292 4 50       9 (ref($this)) or confess "$MNAME:include: bad class call to include.\n";
293 4 50       8 (defined($yelement)) or confess "$MNAME:include: bad call for y to include. no undef allowed.\n";
294 4 50       20 (!ref($yelement)) or confess "$MNAME:include: bad call for y to include. need scalar.\n";
295 4 50       10 (defined($xin)) or confess "$MNAME:include: bad call for x to include. no undef allowed.\n";
296 4 50       9 (ref($xin)) or confess "$MNAME:include: bad call for x to include. need reference.\n";
297 4 50       7 (!ref($weight)) or confess "$MNAME:include: bad call for weight to include. need scalar.\n";
298              
299              
300             # omit observations with missing observations;
301 4 50       7 (defined($yelement)) or confess "$MNAME:include: you must give a y value (predictor).";
302 4 50       8 (isNaN($yelement)) and return $this->{n}; # ignore this observation;
303             ## should check for number, not string
304              
305              
306             # check and transfer the X vector
307 4         5 my @xrow;
308 4 50       10 if (ref($xin) eq "ARRAY") { @xrow= @{$xin}; }
  4         4  
  4         9  
309             else {
310 0         0 my $xctr=0;
311 0         0 foreach my $nm (@{$this->{xnames}}) {
  0         0  
312 0 0       0 (defined($xin->{$nm})) or confess "$MNAME:include: Variable '$nm' needs to be set in hash.\n";
313 0         0 $xrow[$xctr]= $xin->{$nm};
314 0         0 ++$xctr;
315             }
316             }
317              
318 4         5 my @xcopy;
319 4         38 for (my $i=1; $i<=$this->{k}; ++$i) {
320 12 50       24 (defined($xrow[$i-1]))
321             or confess "$MNAME:include: Internal Error: at N=".($this->{n}).", the x[".($i-1)."] is undef. use NaN for missing.";
322 12 50       19 (isNaN($xrow[$i-1])) and return $this->{n};
323 12         42 $xcopy[$i]= $xrow[$i-1];
324             ## should check for number, not string
325             }
326              
327             ################ now comes the real routine
328              
329 4         8 $this->{syy}+= ($weight*($yelement*$yelement));
330 4         5 $this->{sy}+= ($weight*($yelement));
331 4 50       8 if ($weight>=0.0) { ++$this->{n}; } else { --$this->{n}; }
  4         5  
  0         0  
332              
333 4         5 $this->{wghtn}+= $weight;
334              
335 4         16 for (my $i=1; $i<=$this->{k};++$i) {
336 11 100       19 if ($weight==0.0) { return $this->{n}; }
  2         8  
337 9 50       22 if (abs($xcopy[$i])>(TINY)) {
338 9         12 my $xi=$xcopy[$i];
339              
340 9         11 my $di=$this->{d}->[$i];
341 9         21 my $dprimei=$di+$weight*($xi*$xi);
342 9         12 my $cbar= $di/$dprimei;
343 9         12 my $sbar= $weight*$xi/$dprimei;
344 9         10 $weight*=($cbar);
345 9         13 $this->{d}->[$i]=$dprimei;
346 9         21 my $nextr=int( (($i-1)*( (2.0*$this->{k}-$i))/2.0+1) );
347 9 50       18 if (!($nextr<=$this->{rbarsize}) ) { confess "$MNAME:include: Internal Error 2"; }
  0         0  
348 9         16 my $xk;
349 9         17 for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
350 11         10 $xk=$xcopy[$kc]; $xcopy[$kc]=$xk-$xi*$this->{rbar}->[$nextr];
  11         18  
351 11         19 $this->{rbar}->[$nextr]= $cbar * $this->{rbar}->[$nextr]+$sbar*$xk;
352 11         23 ++$nextr;
353             }
354 9         8 $xk=$yelement; $yelement-= $xi*$this->{thetabar}->[$i];
  9         13  
355 9         29 $this->{thetabar}->[$i]= $cbar*$this->{thetabar}->[$i]+$sbar*$xk;
356             }
357             }
358 2         4 $this->{sse}+=$weight*($yelement*$yelement);
359              
360             # indicate that Theta is garbage now
361 2         3 $this->{theta}= undef;
362 2         12 $this->{sigmasq}= undef; $this->{rsq}= undef; $this->{adjrsq}= undef;
  2         2  
  2         4  
363              
364 2         6 return $this->{n};
365             }
366              
367              
368             ################################################################
369             ###
370             ### $reg->rsq(), $reg->adjrsq(), $reg->sigmasq(), $reg->ybar(),
371             ### $reg->sst(), $reg->k(), $reg->n()
372             ###
373             ### These methods provide common auxiliary information. rsq,
374             ### adjrsq, sigmasq, sst, and ybar have not been checked but are
375             ### likely correct. The results are stored for later usage,
376             ### although this is somewhat unnecessary because the
377             ### computation is so simple anyway.
378             ################################################################
379              
380             sub rsq {
381 1     1 0 7 my $this= shift;
382 1         5 return $this->{rsq}= 1.0- $this->{sse} / $this->sst();
383             }
384              
385             sub adjrsq {
386 0     0 0 0 my $this= shift;
387 0         0 return $this->{adjrsq}= 1.0- (1.0- $this->rsq())*($this->{n}-1)/($this->{n} - $this->{k});
388             }
389              
390             sub sigmasq {
391 0     0 0 0 my $this= shift;
392 0 0       0 return $this->{sigmasq}= ($this->{n}<=$this->{k}) ? "Inf" : ($this->{sse}/($this->{n} - $this->{k}));
393             }
394              
395             sub ybar {
396 1     1 0 2 my $this= shift;
397 1         22 return $this->{ybar}= $this->{sy}/$this->{wghtn};
398             }
399              
400             sub sst {
401 1     1 0 2 my $this= shift;
402 1         5 return $this->{sst}= ($this->{syy} - $this->{wghtn}*($this->ybar())**2);
403             }
404              
405             sub k {
406 0     0 0 0 my $this= shift;
407 0         0 return $this->{k};
408             }
409             sub n {
410 0     0 0 0 my $this= shift;
411 0         0 return $this->{n};
412             }
413              
414              
415              
416             ################################################################
417             ### $reg->print() [no arguments!]
418             ###
419             ### prints the estimated coefficients, and R^2 and N. For an
420             ### example see the Synopsis.
421             ################################################################
422             sub print {
423 0     0 0 0 my $this= shift;
424              
425 0         0 print "****************************************************************\n";
426 0         0 print "Regression '$this->{regname}'\n";
427 0         0 print "****************************************************************\n";
428              
429 0         0 my $theta= $this->theta();
430 0         0 my @standarderrors= $this->standarderrors();
431              
432 0         0 printf "%-15s\t%12s\t%12s\t%7s\n", "Name", "Theta", "StdErr", "T-stat";
433 0         0 for (my $i=0; $i< $this->k(); ++$i) {
434 0 0       0 my $name= "[$i".(defined($this->{xnames}->[$i]) ? "='$this->{xnames}->[$i]'":"")."]";
435 0         0 printf "%-15s\t", $name;
436 0         0 printf "%12.4f\t", $theta->[$i];
437 0         0 printf "%12.4f\t", $standarderrors[$i];
438 0         0 printf "%7.2f", ($theta->[$i]/$standarderrors[$i]);
439 0         0 printf "\n";
440             }
441              
442 0         0 print "\nR^2= ".sprintf("%.3f", $this->rsq()).", N= ".$this->n().", K= ".$this->k()."\n";
443 0         0 print "****************************************************************\n";
444             }
445              
446              
447              
448             ################################################################
449             ### $theta = $reg->theta or @theta = $reg->theta
450             ###
451             ### This is the work horse. It estimates and returns the vector
452             ### of coefficients. In scalar context returns an array
453             ### reference; in list context it returns the list of
454             ### coefficients.
455             ################################################################
456             sub theta {
457 1     1 0 8 my $this= shift;
458              
459 1 50       4 if (defined($this->{theta})) {
460 0 0       0 return wantarray ? @{$this->{theta}} : $this->{theta};
  0         0  
461             }
462              
463 1 50       5 if ($this->{n} < $this->{k}) { return; }
  0         0  
464 1         6 for (my $i=($this->{k}); $i>=1; --$i) {
465 3         9 $this->{theta}->[$i]= $this->{thetabar}->[$i];
466 3         8 my $nextr= int (($i-1)*((2.0*$this->{k}-$i))/2.0+1);
467 3 50       9 if (!($nextr<=$this->{rbarsize})) { confess "$MNAME:theta: Internal Error 3"; }
  0         0  
468 3         25 for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
469 3         8 $this->{theta}->[$i]-=($this->{rbar}->[$nextr]*$this->{theta}->[$kc]);
470 3         12 ++$nextr;
471             }
472             }
473              
474              
475 1         3 my $ref = $this->{theta}; shift(@$ref); # we are counting from 0
  1         2  
476              
477             # if in a scalar context, otherwise please return the array directly
478 1 50       5 wantarray ? @{$this->{theta}} : $this->{theta};
  1         6  
479             }
480              
481             ################################################################
482             ### @se= $reg->standarderrors()
483             ###
484             ### This is the most difficult routine. Take it on faith.
485             ###
486             ### R=Rbar is an upperright triangular matrix, kept in normalized
487             ### form with implicit 1's on the diagonal. D is a diagonal scaling
488             ### matrix. These correspond to "standard Regression usage" as
489             ###
490             ### X' X = R' D R
491             ###
492             ### A backsubsitution routine (in thetacov) allows to invert the R
493             ### matrix (the inverse is upper-right triangular, too!). Call this
494             ### matrix H, that is H=R^(-1).
495             ###
496             ### (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
497             ### = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
498             ###
499             ### Let's work this for our example, where
500             ###
501             ### $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
502             ### $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
503             ### $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
504             ### $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
505             ###
506             ### For debuggin, the X'X matrix for our example is
507             ### 4, 50, 3
508             ### 50 1116 29
509             ### 3 29 9
510             ###
511             ### Its inverse is
512             ### 0.70967 -0.027992 -0.146360
513             ### -0.02799 0.002082 0.002622
514             ### -0.14636 0.002622 0.151450
515             ###
516             ### Internally, this is kept as follows
517             ###
518             ### R is 1, 0, 0
519             ### 12.5 1 0
520             ### 0.75 -0.0173 1
521             ###
522             ### d is the diagonal(4,491,6.603) matrix, which as 1/sqrt becomes dhi= 0.5, 0.04513, 0.3892
523             ###
524             ### R * d * R' is indeed the X' X matrix.
525             ###
526             ### The inverse of R is
527             ###
528             ### 1, 0, 0
529             ### -12.5 1 0
530             ### -0.9664 0.01731 1
531             ###
532             ### in R, t(solve(R) %*% dhi) %*% t( t(solve(R) %*% dhi) ) is the correct inverse.
533             ###
534             ### The routine has a debug switch which makes it come out very verbose.
535             ################################################################
536             my $debug=0;
537              
538             sub standarderrors {
539 0     0 0   my $this= shift;
540 0           our $K= $this->{k}; # convenience
541              
542 0           our @u;
543             sub ui {
544 0 0   0 0   if ($debug) {
545 0 0 0       ($_[0]<1)||($_[0]>$K) and confess "$MNAME:standarderrors: bad index 0 $_[0]\n";
546 0 0 0       ($_[1]<1)||($_[1]>$K) and confess "$MNAME:standarderrors: bad index 1 $_[0]\n";
547             }
548 0           return (($K*($_[0]-1))+($_[1]-1));
549             }
550             sub giveuclear {
551 0     0 0   for (my $i=0; $i<($K**2); ++$i) { $u[$i]=0.0; }
  0            
552 0 0         return (wantarray) ? @u : \@u;
553             }
554              
555 0     0 0   sub u { return $u[ui($_[0], $_[1])]; }
556 0     0 0   sub setu { return $u[ui($_[0], $_[1])]= $_[2]; }
557 0     0 0   sub add2u { return $u[ui($_[0], $_[1])]+= $_[2]; }
558 0     0 0   sub mult2u { return $u[ui($_[0], $_[1])]*= $_[2]; }
559              
560 0 0         (defined($K)) or confess "$MNAME:standarderrors: Internal Error: I forgot the number of variables.\n";
561 0 0         if ($debug) {
562 0           print "The Start Matrix is:\n";
563 0           for (my $i=1; $i<=$K; ++$i) {
564 0           print "[$i]:\t";
565 0           for (my $j=1; $j<=$K; ++$j) {
566 0           print $this->rbr($i, $j)."\t";
567             }
568 0           print "\n";
569             }
570 0           print "The Start d vector is:\n";
571 0           for (my $i=1; $i<=$K; ++$i) {
572 0           print "".$this->{d}[$i]."\t";
573             }
574 0           print "\n";
575             }
576              
577             sub rbrindex {
578 0 0   0 0   return ($_[0] == $_[1]) ? -9 :
    0          
579             ($_[0]>$_[1]) ? -8 :
580             ((($_[0]-1.0)* (2.0*$K-$_[0])/2.0+1.0) + $_[1] - 1 - $_[0] ); }
581              
582             # now a real member routine;
583             sub rbr {
584 0     0 0   my $this= shift;
585 0 0         return ($_[0] == $_[1]) ? 1 : ( ($_[0]>$_[1]) ? 0 : ($this->{rbar}[rbrindex($_[0],$_[1])]));
    0          
586             }
587              
588 0           my $u= giveuclear();
589              
590 0           for (my $j=$K; $j>=1; --$j) {
591 0           setu($j,$j, 1.0/($this->rbr($j,$j)));
592 0           for (my $k=$j-1; $k>=1; --$k) {
593 0           setu($k,$j,0);
594 0           for (my $i=$k+1; $i<=$j; ++$i) { add2u($k,$j, $this->rbr($k,$i)*u($i,$j)); }
  0            
595 0           mult2u($k,$j, (-1.0)/$this->rbr($k,$k));
596             }
597             }
598              
599 0 0         if ($debug) {
600 0           print "The Inverse Matrix of R is:\n";
601 0           for (my $i=1; $i<=$K; ++$i) {
602 0           print "[$i]:\t";
603 0           for (my $j=1; $j<=$K; ++$j) {
604 0           print $u[ui($i,$j)]."\t";
605             }
606 0           print "\n";
607             }
608             }
609              
610 0           for (my $i=1;$i<=$K;++$i) {
611 0           for (my $j=1;$j<=$K;++$j) {
612 0 0         if (abs($this->{d}[$j])
613 0           mult2u($i,$j, sqrt(1.0/TINY));
614 0 0         if (abs($this->{d}[$j])==0.0) {
615 0 0         if ($this->{neverabort}) {
616 0           for (my $i=0; $i<($K**2); ++$i) { $u[$i]= "NaN"; }
  0            
617 0           return undef;
618             }
619 0           else { confess "$MNAME:standarderrors: I cannot compute the theta-covariance matrix for variable $j ".($this->{d}[$j])."\n"; }
620             }
621             }
622 0           else { mult2u($i,$j, sqrt(1.0/$this->{d}[$j])); }
623             }
624             }
625              
626 0 0         if ($debug) {
627 0           print "The Inverse Matrix of R multipled by D^(-1/2) is:\n";
628 0           for (my $i=1; $i<=$K; ++$i) {
629 0           print "[$i]:\t";
630 0           for (my $j=1; $j<=$K; ++$j) {
631 0           print $u[ui($i,$j)]."\t";
632             }
633 0           print "\n";
634             }
635             }
636              
637 0 0         $this->{sigmasq}= ($this->{n}<=$K) ? "Inf" : ($this->{sse}/($this->{n} - $K));
638 0           my @xpxinv;
639 0           for (my $i=1;$i<=$K; ++$i) {
640 0           for (my $j=$i;$j<=$K;++$j) {
641 0           my $indexij= ui($i,$j);
642 0           $xpxinv[$indexij]= 0.0;
643 0           for (my $k=1;$k<=$K;++$k) {
644 0           $xpxinv[$indexij] += $u[ui($i,$k)]*$u[ui($j,$k)];
645             }
646 0           $xpxinv[ui($j,$i)]= $xpxinv[$indexij]; # this is symmetric
647             }
648             }
649              
650 0 0         if ($debug) {
651 0           print "The full inverse matrix of X'X is:\n";
652 0           for (my $i=1; $i<=$K; ++$i) {
653 0           print "[$i]:\t";
654 0           for (my $j=1; $j<=$K; ++$j) {
655 0           print $xpxinv[ui($i,$j)]."\t";
656             }
657 0           print "\n";
658             }
659 0           print "The sigma^2 is ".$this->{sigmasq}."\n";
660             }
661              
662             ## 99% of the usage here will be to print the diagonal elements of sqrt ( (X' X) sigma^2 )
663             ## so, let's make this our first returned object;
664              
665 0           my @secoefs;
666 0           for (my $i=1; $i<=$K; ++$i) {
667 0           $secoefs[$i-1]= sqrt($xpxinv[ui($i,$i)] * $this->{sigmasq});
668             }
669 0 0         if ($debug) { for (my $i=0; $i<$K; ++$i) { print " $secoefs[$i] "; } print "\n"; }
  0            
  0            
  0            
670              
671             # the following are clever return methods; if the user goes over the secoefs,
672             # almost surely an error will result, because he will run into xpxinv. For special
673             # usage, however, xpxinv may still be useful.
674              
675 0           return ( @secoefs, \@xpxinv, $this->sigmasq );
676             }
677              
678              
679             ################################
680             sub linearcombination_variance {
681 0     0 0   my $this= shift;
682 0           our $K= $this->{k}; # convenience
683              
684 0           my @linear= @_;
685              
686 0 0         ($#linear+1 == $K) or confess "$MNAME:linearcombination_variance: ".
687             "Sorry, you must give a vector of length $K, not ".($#linear+1)."\n";
688              
689 0           my @allback= $this->standarderrors(); # compute everything we need;
690              
691 0           my $xpxinv= $allback[$this->{k}];
692 0           my $sigmasq= $allback[$this->{k}+1];
693              
694 0           my $sum=0;
695 0           for (my $i=1; $i<=$K; ++$i) {
696 0           for (my $j=1; $j<=$K; ++$j) {
697 0           $sum+= $linear[$i-1]*$linear[$j-1]*$xpxinv->[ui($i,$j)];
698             }
699             }
700 0           $sum*=$sigmasq;
701 0           return $sum;
702             }
703              
704              
705             ################################################################
706             ### sub dump() was used internally for debugging.
707             ################################################################
708             sub dump {
709 0     0 0   my $this= $_[0];
710 0           print "****************************************************************\n";
711 0           print "Regression '$this->{regname}'\n";
712 0           print "****************************************************************\n";
713             sub print1val {
714 1     1   11 no strict;
  1         2  
  1         559  
715 0 0   0 0   print "$_[1]($_[2])=\t". ((defined($_[0]->{ $_[2] }) ? $_[0]->{ $_[2] } : "intentionally undef"));
716              
717 0           my $ref=$_[0]->{ $_[2] };
718              
719 0 0         if (ref($ref) eq 'ARRAY') {
    0          
720 0           my $arrayref= $ref;
721 0           print " $#$arrayref+1 elements:\n";
722 0 0         if ($#$arrayref>30) {
723 0           print "\t";
724 0           for(my $i=0; $i<$#$arrayref+1; ++$i) { print "$i='$arrayref->[$i]';"; }
  0            
725 0           print "\n";
726             }
727             else {
728 0           for(my $i=0; $i<$#$arrayref+1; ++$i) { print "\t$i=\t'$arrayref->[$i]'\n"; }
  0            
729             }
730             }
731             elsif (ref($ref) eq 'HASH') {
732 0           my $hashref= $ref;
733 0           print " ".scalar(keys(%$hashref))." elements\n";
734 0           while (my ($key, $val) = each(%$hashref)) {
735 0           print "\t'$key'=>'$val';\n";
736             }
737             }
738             else {
739 0           print " [was scalar]\n"; }
740             }
741              
742 0           while (my ($key, $val) = each(%$this)) {
743 0           $this->print1val($key, $key);
744             }
745 0           print "****************************************************************\n";
746             }
747              
748             ################################################################
749             ### The Test Program. Invoke as "perl Regression.pm".
750             ################################################################
751              
752              
753             if ($0 eq "Regression.pm") {
754              
755             # Create regression object
756             my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
757              
758             # Add data points
759             $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
760             $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
761             $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
762              
763             my %inhash= ( const => 1.0, someX => 11.0, someY => 2.0, ignored => "ignored" );
764             $reg->include( 15.0, \%inhash );
765             # $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
766              
767             # Print the result
768             $reg->print();
769             }
770              
771              
772             1;