File Coverage

blib/lib/Statistics/Regression.pm

Criterion	Covered	Total	%
statement	108	271	39.8
branch	28	108	25.9
condition	3	13	23.0
subroutine	13	30	43.3
pod	0	25	0.0
total	152	447	34.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Regression;
2
3							$VERSION = '0.53';
4							my $DATE = "2007/07/07";
5							my $MNAME= "$0::Statistics::Regression";
6
7	1			1		41902	use strict;
	1					3
	1					134
8	1			1		7	use warnings FATAL => qw{ uninitialized };
	1					2
	1					53
9
10	1			1		7	use Carp;
	1					3
	1					149
11
12							################################################################
13							=pod
14
15							=head1 NAME
16
17							Regression.pm - weighted linear regression package (line+plane fitting)
18
19
20							=head1 SYNOPSIS
21
22							use Statistics::Regression;
23
24							# Create regression object
25							my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
26
27							# Add data points
28							$reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
29							$reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
30							$reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
31							$reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
32
33							or
34
35							my %d;
36							$d{const} = 1.0; $d{someX}= 5.0; $d{someY}= 2.0; $d{ignored}="anything else";
37							$reg->include( 3.0, \%d ); # names are picked off the Regression specification
38
39							Please note that you must provide the constant if you want one.
40
41							# Finally, print the result
42							$reg->print();
43
44							This prints the following:
45
46							****************************************************************
47							Regression 'sample regression'
48							****************************************************************
49							Name Theta StdErr T-stat
50							[0='const'] 0.2950 6.0512 0.05
51							[1='someX'] 0.6723 0.3278 2.05
52							[2='someY'] 1.0688 2.7954 0.38
53
54							R^2= 0.808, N= 4
55							****************************************************************
56
57
58
59							The hash input method has the advantage that you can now just
60							fill the observation hashes with all your variables, and use the
61							same code to run regression, changing the regression
62							specification at one and only one spot (the new() invokation).
63							You do not need to change the inputs in the include() statement.
64							For example,
65
66							my @obs; ## a global variable. observations are like: %oneobs= %{$obs[1]};
67
68							sub run_regression {
69							my $reg = Statistics::Regression->new( $_[0], $_[2] );
70							foreach my $obshashptr (@obs) { $reg->include( $_[1], $_[3] ); }
71							$reg->print();
72							}
73
74							run_regression("bivariate regression", $obshashptr->{someY}, [ "const", "someX" ] );
75							run_regression("trivariate regression", $obshashptr->{someY}, [ "const", "someX", "someZ" ] );
76
77
78
79							Of course, you can use the subroutines to do the printing work yourself:
80
81							my @theta = $reg->theta();
82							my @se = $reg->standarderrors();
83							my $rsq = $reg->rsq();
84							my $adjrsq = $reg->adjrsq();
85							my $ybar = $reg->ybar(); ## the average of the y vector
86							my $sst = $reg->sst(); ## the sum-squares-total
87							my $sigmasq= $reg->sigmasq(); ## the variance of the residual
88							my $k = $reg->k(); ## the number of variables
89							my $n = $reg->n(); ## the number of observations
90
91							In addition, there are some other helper routines, and a
92							subroutine linearcombination_variance(). If you don't know what
93							this is, don't use it.
94
95
96							=head1 BACKGROUND WARNING
97
98							You should have an understanding of OLS regressions if you want
99							to use this package. You can get this from an introductory
100							college econometrics class and/or from most intermediate college
101							statistics classes. If you do not have this background
102							knowledge, then this package will remain a mystery to you.
103							There is no support for this package--please don't expect any.
104
105
106							=head1 DESCRIPTION
107
108							Regression.pm is a multivariate linear regression package. That
109							is, it estimates the c coefficients for a line-fit of the type
110
111							y= c(0)x(0) + c(1)x1 + c(2)x2 + ... + c(k)xk
112
113							given a data set of N observations, each with k independent x
114							variables and one y variable. Naturally, N must be greater than
115							k---and preferably considerably greater. Any reasonable
116							undergraduate statistics book will explain what a regression is.
117							Most of the time, the user will provide a constant ('1') as x(0)
118							for each observation in order to allow the regression package to
119							fit an intercept.
120
121
122							=head1 ALGORITHM
123
124							=head2 Original Algorithm (ALGOL-60):
125
126							W. M. Gentleman, University of Waterloo, "Basic
127							Description For Large, Sparse Or Weighted Linear Least
128							Squares Problems (Algorithm AS 75)," Applied Statistics
129							(1974) Vol 23; No. 3
130
131							Gentleman's algorithm is I statistical standard. Insertion
132							of a new observation can be done one observation at any time
133							(WITH A WEIGHT!), and still only takes a low quadratic time.
134							The storage space requirement is of quadratic order (in the
135							indep variables). A practically infinite number of observations
136							can easily be processed!
137
138							=head2 Internal Data Structures
139
140							R=Rbar is an upperright triangular matrix, kept in normalized
141							form with implicit 1's on the diagonal. D is a diagonal scaling
142							matrix. These correspond to "standard Regression usage" as
143
144							X' X = R' D R
145
146							A backsubsitution routine (in thetacov) allows to invert the R
147							matrix (the inverse is upper-right triangular, too!). Call this
148							matrix H, that is H=R^(-1).
149
150							(X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
151							= [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
152
153
154							=head1 BUGS/PROBLEMS
155
156							None known.
157
158							=over 4
159
160							=item Perl Problem
161
162							Unfortunately, perl is unaware of IEEE number representations.
163							This makes it a pain to test whether an observation contains any
164							missing variables (coded as 'NaN' in Regression.pm).
165
166							=back
167
168							=for comment
169							pod2html -noindex -title "perl weighted least squares regression package" Regression.pm > Regression.html
170
171
172							=head1 VERSION and RECENT CHANGES
173
174							2007/04/04: Added Coefficient Standard Errors
175
176							2007/07/01: Added self-test use (if invoked as perl Regression.pm)
177							at the end. cleaned up some print sprintf.
178							changed syntax on new() to eliminate passing K.
179
180							2007/07/07: allowed passing hash with names to include().
181
182
183							=head1 AUTHOR
184
185							Naturally, Gentleman invented this algorithm. It was adaptated
186							by Ivo Welch. Alan Miller (alan\@dmsmelb.mel.dms.CSIRO.AU)
187							pointed out nicer ways to compute the R^2. Ivan Tubert-Brohman
188							helped wrap the module as as a standard CPAN distribution.
189
190							=head1 LICENSE
191
192							This module is released for free public use under a GPL license.
193
194							(C) Ivo Welch, 2001,2004, 2007.
195
196							=cut
197
198
199							################################################################
200							#### let's start with handling of missing data ("nan" or "NaN")
201							################################################################
202	1			1		6	use constant TINY => 1e-8;
	1					1
	1					4534
203							my $nan= "NaN";
204
205							sub isNaN {
206	16	50		16	0	80	if ($_[0] !~ /[0-9nan]/) { confess "$MNAME:isNaN: definitely not a number in NaN: '$_[0]'"; }
	0					0
207	16		33			85	return ($_[0]=~ /NaN/i) \|\| ($_[0] != $_[0]);
208							}
209
210
211							################################################################
212							### my $reg = Statistics::Regression->new($regname, \@var_names)
213							###
214							### Receives the number of variables on each observations (i.e.,
215							### an integer) and returns the blessed data structure as a
216							### Statistics::Regression object. Also takes an optional name
217							### for this regression to remember, as well as a reference to a
218							### k*1 array of names for the X coefficients.
219							###
220							### I have now made it mandatory to give some names.
221							###
222							################################################################
223							sub new {
224	1	50		1	0	13	my $classname= shift; (!ref($classname)) or confess "$MNAME:new: bad class call to new ($classname).\n";
	1					3
225	1		50			7	my $regname= shift \|\| "no-name";
226	1					2	my $xnameptr= shift;
227
228	1	50				3	(defined($regname)) or confess "$MNAME:new: bad name in for regression. no undef allowed.\n";
229	1	50				4	(!ref($regname)) or confess "$MNAME:new: bad name in for regression.\n";
230	1	50				2	(defined($xnameptr)) or confess "$MNAME:new: You must provide variable names, because this tells me the number of columns. no undef allowed.\n";
231	1	50				5	(ref($xnameptr) eq "ARRAY") or confess "$MNAME:new: bad xnames for regression. Must be pointer.\n";
232
233	1					3	my $K= (@{$xnameptr});
	1					2
234
235	1	50				4	if (!defined($K)) { confess "$MNAME:new: cannot determine the number of variables"; }
	0					0
236	1	50				3	if ($K<=1) { confess "$MNAME:new: Cannot run a regression without at least two variables."; }
	0					0
237
238							sub zerovec {
239	3			3	0	4	my @rv;
240	3					8	for (my $i=0; $i<=$_[0]; ++$i) { $rv[$i]=0; }
	16					52
241	3					30	return \@rv;
242							}
243
244	1					5	bless {
245							k => $K,
246							regname => $regname,
247							xnames => $xnameptr,
248
249							# constantly updated
250							n => 0,
251							sse => 0,
252							syy => 0,
253							sy => 0,
254							wghtn => 0,
255							d => zerovec($K),
256							thetabar => zerovec($K),
257							rbarsize => ($K+1)*$K/2+1,
258							rbar => zerovec(($K+1)*$K/2+1),
259
260							# other constants
261							neverabort => 0,
262
263							# computed on demand
264							theta => undef,
265							sigmasq => undef,
266							rsq => undef,
267							adjrsq => undef
268							}, $classname;
269							}
270
271
272							################################################################
273							### $reg->include( $y, [ $x1, $x2, $x3 ... $xk ], $weight );
274							###
275							### Add one new observation. The weight is optional. Note that
276							### inclusion with a weight of -1 can be used to delete an
277							### observation.
278							###
279							### The error checking and transfer of arguments is clutzy, but
280							### works. if I had POSIX assured, I could do better number
281							### checking. right now, I don't do any.
282							###
283							### Returns the number of observations so far included.
284							################################################################
285							sub include {
286	4			4	0	20	my $this = shift;
287	4					5	my $yelement= shift;
288	4					6	my $xin= shift;
289	4		50			17	my $weight= shift \|\| 1.0;
290
291							# modest input checking;
292	4	50				9	(ref($this)) or confess "$MNAME:include: bad class call to include.\n";
293	4	50				8	(defined($yelement)) or confess "$MNAME:include: bad call for y to include. no undef allowed.\n";
294	4	50				20	(!ref($yelement)) or confess "$MNAME:include: bad call for y to include. need scalar.\n";
295	4	50				10	(defined($xin)) or confess "$MNAME:include: bad call for x to include. no undef allowed.\n";
296	4	50				9	(ref($xin)) or confess "$MNAME:include: bad call for x to include. need reference.\n";
297	4	50				7	(!ref($weight)) or confess "$MNAME:include: bad call for weight to include. need scalar.\n";
298
299
300							# omit observations with missing observations;
301	4	50				7	(defined($yelement)) or confess "$MNAME:include: you must give a y value (predictor).";
302	4	50				8	(isNaN($yelement)) and return $this->{n}; # ignore this observation;
303							## should check for number, not string
304
305
306							# check and transfer the X vector
307	4					5	my @xrow;
308	4	50				10	if (ref($xin) eq "ARRAY") { @xrow= @{$xin}; }
	4					4
	4					9
309							else {
310	0					0	my $xctr=0;
311	0					0	foreach my $nm (@{$this->{xnames}}) {
	0					0
312	0	0				0	(defined($xin->{$nm})) or confess "$MNAME:include: Variable '$nm' needs to be set in hash.\n";
313	0					0	$xrow[$xctr]= $xin->{$nm};
314	0					0	++$xctr;
315							}
316							}
317
318	4					5	my @xcopy;
319	4					38	for (my $i=1; $i<=$this->{k}; ++$i) {
320	12	50				24	(defined($xrow[$i-1]))
321							or confess "$MNAME:include: Internal Error: at N=".($this->{n}).", the x[".($i-1)."] is undef. use NaN for missing.";
322	12	50				19	(isNaN($xrow[$i-1])) and return $this->{n};
323	12					42	$xcopy[$i]= $xrow[$i-1];
324							## should check for number, not string
325							}
326
327							################ now comes the real routine
328
329	4					8	$this->{syy}+= ($weight($yelement$yelement));
330	4					5	$this->{sy}+= ($weight*($yelement));
331	4	50				8	if ($weight>=0.0) { ++$this->{n}; } else { --$this->{n}; }
	4					5
	0					0
332
333	4					5	$this->{wghtn}+= $weight;
334
335	4					16	for (my $i=1; $i<=$this->{k};++$i) {
336	11	100				19	if ($weight==0.0) { return $this->{n}; }
	2					8
337	9	50				22	if (abs($xcopy[$i])>(TINY)) {
338	9					12	my $xi=$xcopy[$i];
339
340	9					11	my $di=$this->{d}->[$i];
341	9					21	my $dprimei=$di+$weight($xi$xi);
342	9					12	my $cbar= $di/$dprimei;
343	9					12	my $sbar= $weight*$xi/$dprimei;
344	9					10	$weight*=($cbar);
345	9					13	$this->{d}->[$i]=$dprimei;
346	9					21	my $nextr=int( (($i-1)( (2.0$this->{k}-$i))/2.0+1) );
347	9	50				18	if (!($nextr<=$this->{rbarsize}) ) { confess "$MNAME:include: Internal Error 2"; }
	0					0
348	9					16	my $xk;
349	9					17	for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
350	11					10	$xk=$xcopy[$kc]; $xcopy[$kc]=$xk-$xi*$this->{rbar}->[$nextr];
	11					18
351	11					19	$this->{rbar}->[$nextr]= $cbar * $this->{rbar}->[$nextr]+$sbar*$xk;
352	11					23	++$nextr;
353							}
354	9					8	$xk=$yelement; $yelement-= $xi*$this->{thetabar}->[$i];
	9					13
355	9					29	$this->{thetabar}->[$i]= $cbar$this->{thetabar}->[$i]+$sbar$xk;
356							}
357							}
358	2					4	$this->{sse}+=$weight($yelement$yelement);
359
360							# indicate that Theta is garbage now
361	2					3	$this->{theta}= undef;
362	2					12	$this->{sigmasq}= undef; $this->{rsq}= undef; $this->{adjrsq}= undef;
	2					2
	2					4
363
364	2					6	return $this->{n};
365							}
366
367
368							################################################################
369							###
370							### $reg->rsq(), $reg->adjrsq(), $reg->sigmasq(), $reg->ybar(),
371							### $reg->sst(), $reg->k(), $reg->n()
372							###
373							### These methods provide common auxiliary information. rsq,
374							### adjrsq, sigmasq, sst, and ybar have not been checked but are
375							### likely correct. The results are stored for later usage,
376							### although this is somewhat unnecessary because the
377							### computation is so simple anyway.
378							################################################################
379
380							sub rsq {
381	1			1	0	7	my $this= shift;
382	1					5	return $this->{rsq}= 1.0- $this->{sse} / $this->sst();
383							}
384
385							sub adjrsq {
386	0			0	0	0	my $this= shift;
387	0					0	return $this->{adjrsq}= 1.0- (1.0- $this->rsq())*($this->{n}-1)/($this->{n} - $this->{k});
388							}
389
390							sub sigmasq {
391	0			0	0	0	my $this= shift;
392	0	0				0	return $this->{sigmasq}= ($this->{n}<=$this->{k}) ? "Inf" : ($this->{sse}/($this->{n} - $this->{k}));
393							}
394
395							sub ybar {
396	1			1	0	2	my $this= shift;
397	1					22	return $this->{ybar}= $this->{sy}/$this->{wghtn};
398							}
399
400							sub sst {
401	1			1	0	2	my $this= shift;
402	1					5	return $this->{sst}= ($this->{syy} - $this->{wghtn}($this->ybar())*2);
403							}
404
405							sub k {
406	0			0	0	0	my $this= shift;
407	0					0	return $this->{k};
408							}
409							sub n {
410	0			0	0	0	my $this= shift;
411	0					0	return $this->{n};
412							}
413
414
415
416							################################################################
417							### $reg->print() [no arguments!]
418							###
419							### prints the estimated coefficients, and R^2 and N. For an
420							### example see the Synopsis.
421							################################################################
422							sub print {
423	0			0	0	0	my $this= shift;
424
425	0					0	print "****************************************************************\n";
426	0					0	print "Regression '$this->{regname}'\n";
427	0					0	print "****************************************************************\n";
428
429	0					0	my $theta= $this->theta();
430	0					0	my @standarderrors= $this->standarderrors();
431
432	0					0	printf "%-15s\t%12s\t%12s\t%7s\n", "Name", "Theta", "StdErr", "T-stat";
433	0					0	for (my $i=0; $i< $this->k(); ++$i) {
434	0	0				0	my $name= "[$i".(defined($this->{xnames}->[$i]) ? "='$this->{xnames}->[$i]'":"")."]";
435	0					0	printf "%-15s\t", $name;
436	0					0	printf "%12.4f\t", $theta->[$i];
437	0					0	printf "%12.4f\t", $standarderrors[$i];
438	0					0	printf "%7.2f", ($theta->[$i]/$standarderrors[$i]);
439	0					0	printf "\n";
440							}
441
442	0					0	print "\nR^2= ".sprintf("%.3f", $this->rsq()).", N= ".$this->n().", K= ".$this->k()."\n";
443	0					0	print "****************************************************************\n";
444							}
445
446
447
448							################################################################
449							### $theta = $reg->theta or @theta = $reg->theta
450							###
451							### This is the work horse. It estimates and returns the vector
452							### of coefficients. In scalar context returns an array
453							### reference; in list context it returns the list of
454							### coefficients.
455							################################################################
456							sub theta {
457	1			1	0	8	my $this= shift;
458
459	1	50				4	if (defined($this->{theta})) {
460	0	0				0	return wantarray ? @{$this->{theta}} : $this->{theta};
	0					0
461							}
462
463	1	50				5	if ($this->{n} < $this->{k}) { return; }
	0					0
464	1					6	for (my $i=($this->{k}); $i>=1; --$i) {
465	3					9	$this->{theta}->[$i]= $this->{thetabar}->[$i];
466	3					8	my $nextr= int (($i-1)((2.0$this->{k}-$i))/2.0+1);
467	3	50				9	if (!($nextr<=$this->{rbarsize})) { confess "$MNAME:theta: Internal Error 3"; }
	0					0
468	3					25	for (my $kc=$i+1;$kc<=$this->{k};++$kc) {
469	3					8	$this->{theta}->[$i]-=($this->{rbar}->[$nextr]*$this->{theta}->[$kc]);
470	3					12	++$nextr;
471							}
472							}
473
474
475	1					3	my $ref = $this->{theta}; shift(@$ref); # we are counting from 0
	1					2
476
477							# if in a scalar context, otherwise please return the array directly
478	1	50				5	wantarray ? @{$this->{theta}} : $this->{theta};
	1					6
479							}
480
481							################################################################
482							### @se= $reg->standarderrors()
483							###
484							### This is the most difficult routine. Take it on faith.
485							###
486							### R=Rbar is an upperright triangular matrix, kept in normalized
487							### form with implicit 1's on the diagonal. D is a diagonal scaling
488							### matrix. These correspond to "standard Regression usage" as
489							###
490							### X' X = R' D R
491							###
492							### A backsubsitution routine (in thetacov) allows to invert the R
493							### matrix (the inverse is upper-right triangular, too!). Call this
494							### matrix H, that is H=R^(-1).
495							###
496							### (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1)
497							### = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]'
498							###
499							### Let's work this for our example, where
500							###
501							### $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
502							### $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
503							### $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
504							### $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
505							###
506							### For debuggin, the X'X matrix for our example is
507							### 4, 50, 3
508							### 50 1116 29
509							### 3 29 9
510							###
511							### Its inverse is
512							### 0.70967 -0.027992 -0.146360
513							### -0.02799 0.002082 0.002622
514							### -0.14636 0.002622 0.151450
515							###
516							### Internally, this is kept as follows
517							###
518							### R is 1, 0, 0
519							### 12.5 1 0
520							### 0.75 -0.0173 1
521							###
522							### d is the diagonal(4,491,6.603) matrix, which as 1/sqrt becomes dhi= 0.5, 0.04513, 0.3892
523							###
524							### R * d * R' is indeed the X' X matrix.
525							###
526							### The inverse of R is
527							###
528							### 1, 0, 0
529							### -12.5 1 0
530							### -0.9664 0.01731 1
531							###
532							### in R, t(solve(R) %% dhi) %% t( t(solve(R) %*% dhi) ) is the correct inverse.
533							###
534							### The routine has a debug switch which makes it come out very verbose.
535							################################################################
536							my $debug=0;
537
538							sub standarderrors {
539	0			0	0		my $this= shift;
540	0						our $K= $this->{k}; # convenience
541
542	0						our @u;
543							sub ui {
544	0	0		0	0		if ($debug) {
545	0	0	0				($_[0]<1)\|\|($_[0]>$K) and confess "$MNAME:standarderrors: bad index 0 $_[0]\n";
546	0	0	0				($_[1]<1)\|\|($_[1]>$K) and confess "$MNAME:standarderrors: bad index 1 $_[0]\n";
547							}
548	0						return (($K*($_[0]-1))+($_[1]-1));
549							}
550							sub giveuclear {
551	0			0	0		for (my $i=0; $i<($K**2); ++$i) { $u[$i]=0.0; }
	0
552	0	0					return (wantarray) ? @u : \@u;
553							}
554
555	0			0	0		sub u { return $u[ui($_[0], $_[1])]; }
556	0			0	0		sub setu { return $u[ui($_[0], $_[1])]= $_[2]; }
557	0			0	0		sub add2u { return $u[ui($_[0], $_[1])]+= $_[2]; }
558	0			0	0		sub mult2u { return $u[ui($_[0], $_[1])]*= $_[2]; }
559
560	0	0					(defined($K)) or confess "$MNAME:standarderrors: Internal Error: I forgot the number of variables.\n";
561	0	0					if ($debug) {
562	0						print "The Start Matrix is:\n";
563	0						for (my $i=1; $i<=$K; ++$i) {
564	0						print "[$i]:\t";
565	0						for (my $j=1; $j<=$K; ++$j) {
566	0						print $this->rbr($i, $j)."\t";
567							}
568	0						print "\n";
569							}
570	0						print "The Start d vector is:\n";
571	0						for (my $i=1; $i<=$K; ++$i) {
572	0						print "".$this->{d}[$i]."\t";
573							}
574	0						print "\n";
575							}
576
577							sub rbrindex {
578	0	0		0	0		return ($_[0] == $_[1]) ? -9 :
		0
579							($_[0]>$_[1]) ? -8 :
580							((($_[0]-1.0)* (2.0*$K-$_[0])/2.0+1.0) + $_[1] - 1 - $_[0] ); }
581
582							# now a real member routine;
583							sub rbr {
584	0			0	0		my $this= shift;
585	0	0					return ($_[0] == $_[1]) ? 1 : ( ($_[0]>$_[1]) ? 0 : ($this->{rbar}[rbrindex($_[0],$_[1])]));
		0
586							}
587
588	0						my $u= giveuclear();
589
590	0						for (my $j=$K; $j>=1; --$j) {
591	0						setu($j,$j, 1.0/($this->rbr($j,$j)));
592	0						for (my $k=$j-1; $k>=1; --$k) {
593	0						setu($k,$j,0);
594	0						for (my $i=$k+1; $i<=$j; ++$i) { add2u($k,$j, $this->rbr($k,$i)*u($i,$j)); }
	0
595	0						mult2u($k,$j, (-1.0)/$this->rbr($k,$k));
596							}
597							}
598
599	0	0					if ($debug) {
600	0						print "The Inverse Matrix of R is:\n";
601	0						for (my $i=1; $i<=$K; ++$i) {
602	0						print "[$i]:\t";
603	0						for (my $j=1; $j<=$K; ++$j) {
604	0						print $u[ui($i,$j)]."\t";
605							}
606	0						print "\n";
607							}
608							}
609
610	0						for (my $i=1;$i<=$K;++$i) {
611	0						for (my $j=1;$j<=$K;++$j) {
612	0	0					if (abs($this->{d}[$j])
613	0						mult2u($i,$j, sqrt(1.0/TINY));
614	0	0					if (abs($this->{d}[$j])==0.0) {
615	0	0					if ($this->{neverabort}) {
616	0						for (my $i=0; $i<($K**2); ++$i) { $u[$i]= "NaN"; }
	0
617	0						return undef;
618							}
619	0						else { confess "$MNAME:standarderrors: I cannot compute the theta-covariance matrix for variable $j ".($this->{d}[$j])."\n"; }
620							}
621							}
622	0						else { mult2u($i,$j, sqrt(1.0/$this->{d}[$j])); }
623							}
624							}
625
626	0	0					if ($debug) {
627	0						print "The Inverse Matrix of R multipled by D^(-1/2) is:\n";
628	0						for (my $i=1; $i<=$K; ++$i) {
629	0						print "[$i]:\t";
630	0						for (my $j=1; $j<=$K; ++$j) {
631	0						print $u[ui($i,$j)]."\t";
632							}
633	0						print "\n";
634							}
635							}
636
637	0	0					$this->{sigmasq}= ($this->{n}<=$K) ? "Inf" : ($this->{sse}/($this->{n} - $K));
638	0						my @xpxinv;
639	0						for (my $i=1;$i<=$K; ++$i) {
640	0						for (my $j=$i;$j<=$K;++$j) {
641	0						my $indexij= ui($i,$j);
642	0						$xpxinv[$indexij]= 0.0;
643	0						for (my $k=1;$k<=$K;++$k) {
644	0						$xpxinv[$indexij] += $u[ui($i,$k)]*$u[ui($j,$k)];
645							}
646	0						$xpxinv[ui($j,$i)]= $xpxinv[$indexij]; # this is symmetric
647							}
648							}
649
650	0	0					if ($debug) {
651	0						print "The full inverse matrix of X'X is:\n";
652	0						for (my $i=1; $i<=$K; ++$i) {
653	0						print "[$i]:\t";
654	0						for (my $j=1; $j<=$K; ++$j) {
655	0						print $xpxinv[ui($i,$j)]."\t";
656							}
657	0						print "\n";
658							}
659	0						print "The sigma^2 is ".$this->{sigmasq}."\n";
660							}
661
662							## 99% of the usage here will be to print the diagonal elements of sqrt ( (X' X) sigma^2 )
663							## so, let's make this our first returned object;
664
665	0						my @secoefs;
666	0						for (my $i=1; $i<=$K; ++$i) {
667	0						$secoefs[$i-1]= sqrt($xpxinv[ui($i,$i)] * $this->{sigmasq});
668							}
669	0	0					if ($debug) { for (my $i=0; $i<$K; ++$i) { print " $secoefs[$i] "; } print "\n"; }
	0
	0
	0
670
671							# the following are clever return methods; if the user goes over the secoefs,
672							# almost surely an error will result, because he will run into xpxinv. For special
673							# usage, however, xpxinv may still be useful.
674
675	0						return ( @secoefs, \@xpxinv, $this->sigmasq );
676							}
677
678
679							################################
680							sub linearcombination_variance {
681	0			0	0		my $this= shift;
682	0						our $K= $this->{k}; # convenience
683
684	0						my @linear= @_;
685
686	0	0					($#linear+1 == $K) or confess "$MNAME:linearcombination_variance: ".
687							"Sorry, you must give a vector of length $K, not ".($#linear+1)."\n";
688
689	0						my @allback= $this->standarderrors(); # compute everything we need;
690
691	0						my $xpxinv= $allback[$this->{k}];
692	0						my $sigmasq= $allback[$this->{k}+1];
693
694	0						my $sum=0;
695	0						for (my $i=1; $i<=$K; ++$i) {
696	0						for (my $j=1; $j<=$K; ++$j) {
697	0						$sum+= $linear[$i-1]$linear[$j-1]$xpxinv->[ui($i,$j)];
698							}
699							}
700	0						$sum*=$sigmasq;
701	0						return $sum;
702							}
703
704
705							################################################################
706							### sub dump() was used internally for debugging.
707							################################################################
708							sub dump {
709	0			0	0		my $this= $_[0];
710	0						print "****************************************************************\n";
711	0						print "Regression '$this->{regname}'\n";
712	0						print "****************************************************************\n";
713							sub print1val {
714	1			1		11	no strict;
	1					2
	1					559
715	0	0		0	0		print "$_[1]($_[2])=\t". ((defined($_[0]->{ $_[2] }) ? $_[0]->{ $_[2] } : "intentionally undef"));
716
717	0						my $ref=$_[0]->{ $_[2] };
718
719	0	0					if (ref($ref) eq 'ARRAY') {
		0
720	0						my $arrayref= $ref;
721	0						print " $#$arrayref+1 elements:\n";
722	0	0					if ($#$arrayref>30) {
723	0						print "\t";
724	0						for(my $i=0; $i<$#$arrayref+1; ++$i) { print "$i='$arrayref->[$i]';"; }
	0
725	0						print "\n";
726							}
727							else {
728	0						for(my $i=0; $i<$#$arrayref+1; ++$i) { print "\t$i=\t'$arrayref->[$i]'\n"; }
	0
729							}
730							}
731							elsif (ref($ref) eq 'HASH') {
732	0						my $hashref= $ref;
733	0						print " ".scalar(keys(%$hashref))." elements\n";
734	0						while (my ($key, $val) = each(%$hashref)) {
735	0						print "\t'$key'=>'$val';\n";
736							}
737							}
738							else {
739	0						print " [was scalar]\n"; }
740							}
741
742	0						while (my ($key, $val) = each(%$this)) {
743	0						$this->print1val($key, $key);
744							}
745	0						print "****************************************************************\n";
746							}
747
748							################################################################
749							### The Test Program. Invoke as "perl Regression.pm".
750							################################################################
751
752
753							if ($0 eq "Regression.pm") {
754
755							# Create regression object
756							my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] );
757
758							# Add data points
759							$reg->include( 2.0, [ 1.0, 3.0, -1.0 ] );
760							$reg->include( 1.0, [ 1.0, 5.0, 2.0 ] );
761							$reg->include( 20.0, [ 1.0, 31.0, 0.0 ] );
762
763							my %inhash= ( const => 1.0, someX => 11.0, someY => 2.0, ignored => "ignored" );
764							$reg->include( 15.0, \%inhash );
765							# $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] );
766
767							# Print the result
768							$reg->print();
769							}
770
771
772							1;