File Coverage

blib/lib/Statistics/Autocorrelation.pm

Criterion	Covered	Total	%
statement	44	81	54.3
branch	5	16	31.2
condition	4	12	33.3
subroutine	8	10	80.0
pod	2	2	100.0
total	63	121	52.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Autocorrelation;
2
3	2			2		55339	use warnings;
	2					5
	2					67
4	2			2		11	use strict;
	2					5
	2					74
5	2			2		11	use Carp 'croak';
	2					10
	2					145
6	2			2		1770	use Statistics::Lite 'mean';
	2					3144
	2					2192
7
8							=head1 NAME
9
10							Statistics-Autocorrelation - Coefficients for any lag
11
12							=head1 VERSION
13
14							Version 0.02
15
16							=cut
17
18							our $VERSION = '0.02';
19
20							=head1 SYNOPSIS
21
22							use Statistics::Autocorrelation;
23							$acorr = Statistics::Autocorrelation->new();
24							$coeff = $acorr->coefficient(data => \@data, lag => integer (from 1 to N-1), exact => 0, simple => 1);
25
26							=head1 DESCRIPTION
27
28							Calculates autocorrelation coefficients for a single series of numerical data, for any valid length of I.
29
30							=head1 SUBROUTINES/METHODS
31
32							=head2 new
33
34							$acorr = Statistics::Autocorrelation->new();
35
36							Return a new class object for accessing its methods.
37
38							=cut
39
40							sub new {
41	1			1	1	12	my $class = shift;
42	1					3	my $self = {};
43	1					3	bless $self, $class;
44							#$self->init(@_);
45	1					3	return $self;
46							}
47
48							=head2 coefficient
49
50							$autocorr->coefficient(data => \@data, lag => integer (from 1 to N-1), exact => 0\|1, simple => 1\|0);
51
52							I: C
53
54							Corporate stats programs (e.g., SPSS/PASW), and examples of autocorrelation on the web (e.g., L), often yield/demonstrate a value of the autocorrelation coefficient that assumes that the underlying series is stationary (has no linear or curvilinear trend, no periodicity), and so can be calculated as the population autocorrelation coefficient, the ratio of the autocovariance to the overall variance. To be a valid estimate of the sample autocorrelation coefficient, it is assumed that the number of observations, I, in the sample is "reasonably large" - so that all the observations in each summation in the numerator are taken relative to the mean of the whole series, rather than the exact value at lag I, and also that the variance used in the denominator can be that of the whole series - instead of using completely pairwise products, i.e., making the coefficient dependent on the values of I, ..., I, and I, ..., I at each summation. Additionally, these sources also commonly drop the factor I/(I - 1), assuming that the value is close enough to 1 for large I.
55
56							By default, then, this method returns an estimate of the population autocorrelation coefficient, and the divisor factors are dropped; i.e., the default values of the options C = 0, and C = 1. The default value returned from this method, then, is equivalent to those returned from such corporate software, and demonstrated via such URLs, as cited; and this is also the default form of calculating the coefficient as used in texts such as Chatfield (1975). If you want, however, to keep these divisors, you need to specify C = 0; and if you want, furthermore, the exact sample autocorrelation coefficient, then specify C = 1; then you get the coefficient as calculated by Kendall's (1973) Eq. 3.35.
57
58							A croak will be heard if no value is given for C (expects an array reference), or if the array is empty. A value for C is the only one for which there is no default value and must be given.
59
60							If a value is not given for C, or it equals zero, it is set to the value of 1 by default.
61
62							=cut
63
64							sub coefficient {
65	2			2	1	1339	my $self = shift;
66	2					8	my (%args) = @_;
67	2	50				9	my $dat = ref $args{'data'} ? $args{'data'} : croak 'No value for data for calculating coefficient';
68	2		33			3	my $n = scalar(@{$dat}) \|\| croak 'No data are available for calculating coefficient';
69	2	50				6	croak 'Can\'t autocorrelate a dataset of only one element' if $n < 2;
70	2		50			16	my $lag = $args{'lag'} \|\| 1;
71							#croak "Can\'t autocorrelate with a lag of < $lag > for only < $n > data" if $n - $lag == 1 && !$args{'circular'};
72	2		50			10	$args{'exact'} \|\|= 0;
73
74	2					2	my $rk;
75	2	50				6	if ($args{'exact'} == 1) {
76	0	0				0	if ($args{'circular'}) {
77	0					0	$rk = _calc_exact_circ($dat, $n, $lag);
78							}
79							else {
80	0					0	$rk = _calc_exact($dat, $n, $lag);
81							}
82							}
83							else {
84	2	50				5	if ($args{'circular'}) {
85	0					0	$rk = _calc_approx_circ($dat, $n, $lag, $args{'simple'});
86							}
87							else {
88	2					12	$rk = _calc_approx($dat, $n, $lag, $args{'simple'});
89							}
90							}
91	2					106	return $rk;
92							}
93							*coeff = \&coefficient;
94							# Kendall's (1973) Eq. 3.35., p. 40
95
96							sub _calc_exact {
97	0			0		0	my ($dat, $n, $k) = @_;
98
99	0					0	my $c0 = 1 / ($n - $k);
100	0					0	my ($sum_ui, $sum_uik, $sum_prodsum, $sum_sq_ui, $sum_sq_uik, $i) = (0, 0, 0, 0);
101
102	0					0	for ($i = 0; $i < $n - $k; $i++) {
103	0					0	$sum_ui += $dat->[$i];
104							}
105	0					0	for ($i = 0; $i < $n - $k; $i++) {
106	0					0	$sum_uik += $dat->[$i + $k];
107							}
108
109	0					0	my $c1 = $c0 * $sum_ui;
110	0					0	my $c2 = $c0 * $sum_uik;
111
112							# Numerator:
113	0					0	for ($i = 0; $i < $n - $k; $i++) {
114	0					0	$sum_prodsum += ($dat->[$i] - $c1) * ($dat->[$i + $k] - $c2);
115							}
116	0					0	my $numerator = $c0 * $sum_prodsum;
117
118							# Denominator:
119	0					0	for ($i = 0; $i < $n - $k; $i++) {
120	0					0	$sum_sq_ui += ($dat->[$i] - $c1)**2;
121							}
122	0					0	for ($i = 0; $i < $n - $k; $i++) {
123	0					0	$sum_sq_uik += ($dat->[$i + $k] - $c2)**2;
124							}
125
126	0					0	my $denominator = sqrt($c0 * $sum_sq_ui * $c0 * $sum_sq_uik);
127
128	0					0	my $rk = $numerator / $denominator;
129
130	0					0	return $rk;
131
132							}
133
134							# Kendall Eq. 3.36
135
136							sub _calc_approx {
137	2			2		4	my ($dat, $n, $k) = @_;
138	2					3	my $simple = shift;
139	2					1	my $mean = mean(@{$dat});
	2					11
140	2					32	my ($rk, $sum_resid, $sum_sq, $i) = ();
141
142	2					27	for ($i = 0; $i < $n - $k; $i++) {
143	2					10	$sum_resid += ($dat->[$i] - $mean)*($dat->[$i + $k] - $mean);
144							}
145
146	2					7	$sum_sq = _sum_sq($dat, $n, $mean);
147
148	2	50	33			19	if (defined $simple && !$simple) {
149	0					0	$rk = ($sum_resid / ($n - $k)) / ($sum_sq/$n);
150							}
151							else {
152	2					4	$rk = $sum_resid / $sum_sq;
153							}
154
155	2					6	return $rk;
156							}
157
158							sub _calc_approx_circ {
159	0			0		0	my ($dat, $n, $k) = @_;
160	0		0			0	my $simple = shift \|\| 0;
161	0					0	my $mean = mean(@{$dat});
	0					0
162	0					0	my ($rk, $sum_resid, $sum_sq, $i) = ();
163
164	0					0	for ($i = 0; $i < $n; $i++) { # go right up to the last value in the data
165	0	0				0	my $i2 = $i + $k >= $n ? $k : $i + $k;
166	0					0	$sum_resid += ($dat->[$i] - $mean)*($dat->[$i2] - $mean);
167							}
168
169	0					0	$sum_sq = _sum_sq($dat, $n, $mean);
170
171	0	0				0	if (!$simple) {
172	0					0	$rk = ($sum_resid / ($n - $k)) / ($sum_sq / $n);
173							}
174							else {
175	0					0	$rk = $sum_resid / $sum_sq;
176							}
177
178	0					0	return $rk;
179							}
180
181							sub _sum_sq {
182	2			2		3	my ($dat, $n, $mean) = @_;
183	2					3	my ($i, $sum) = ();
184	2					7	for ($i = 0; $i < $n; $i++) {
185	4					21	$sum += ($dat->[$i] - $mean)**2;
186							}
187	2					4	return $sum;
188							}
189
190							=head1 REFERENCES
191
192							Chatfield, C. (1975). I. London, UK: Chapman and Hall.
193
194							Kendall, M. G. (1973). I. London, UK: Griffin.
195
196							=head1 AUTHOR
197
198							Roderick Garton, C<< >>
199
200							=head1 BUGS
201
202							Please report any bugs or feature requests to C, or through
203							the web interface at L. I will be notified, and then you'll
204							automatically be notified of progress on your bug as I make changes.
205
206							=head1 SUPPORT
207
208							You can find documentation for this module with the perldoc command.
209
210							perldoc Statistics::Autocorrelation
211
212
213							You can also look for information at:
214
215							=over 4
216
217							=item * RT: CPAN's request tracker
218
219							L
220
221							=item * AnnoCPAN: Annotated CPAN documentation
222
223							L
224
225							=item * CPAN Ratings
226
227							L
228
229							=item * Search CPAN
230
231							L
232
233							=back
234
235							=head1 LICENSE AND COPYRIGHT
236
237							Copyright 2011 Roderick Garton.
238
239							This program is free software; you can redistribute it and/or modify it
240							under the terms of either: the GNU General Public License as published
241							by the Free Software Foundation; or the Artistic License.
242
243							See http://dev.perl.org/licenses/ for more information.
244
245							=cut
246
247							1; # End of Statistics::Autocorrelation