File Coverage

blib/lib/Statistics/Sequences/Turns.pm

Criterion	Covered	Total	%
statement	16	18	88.8
branch			n/a
condition			n/a
subroutine	6	6	100.0
pod			n/a
total	22	24	91.6

line	stmt	sub	time	code
1				package Statistics::Sequences::Turns;
2
3	1	1	24440	use 5.008008;
	1		5
	1		44
4	1	1	6	use strict;
	1		2
	1		39
5	1	1	6	use warnings;
	1		7
	1		31
6	1	1	5	use Carp 'croak';
	1		2
	1		90
7	1	1	7	use vars qw($VERSION @ISA);
	1		3
	1		61
8	1	1	511	use Statistics::Sequences 0.11;
	0
	0
9				@ISA = qw(Statistics::Sequences);
10				$VERSION = 0.11;
11				use Statistics::Zed 0.072;
12				our $zed = Statistics::Zed->new();
13
14				=pod
15
16				=head1 NAME
17
18				Statistics::Sequences::Turns - Kendall's test for turning-points - peaks or troughs - in a numerical sequence
19
20				=head1 SYNOPSIS
21
22				use strict;
23				use Statistics::Sequences::Turns 0.11;
24				my $turns = Statistics::Sequences::Turns->new();
25				$turns->load([qw/2 0 8 5 3 5 2 3 1 1 9 4 4 1 5 5 6 5 8 7 5 3 8 5 6/]); # strings/numbers; or send as "data => $aref" with each stat call
26				my $val = $turns->observed(state => 5); # other methods include: expected(), variance(), obsdev() and stdev()
27				$val = $turns->zscore(state => 5, tails => 2, ccorr => 1); # # or want an array & get back both z- and p-value
28				$val = $turns->p_value(state => 5, tails => 1); # assuming data are loaded; alias: test()
29				my $href = $turns->stats_hash(values => {observed => 1, p_value => 1}, ccorr => 1); # include any other stat-method as needed
30				$turns->dump(values => {observed => 1, expected => 1, p_value => 1}, ccorr => 1, flag => 1, precision_s => 3, precision_p => 7);
31				# prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
32
33				=head1 DESCRIPTION
34
35				For data of the continuous numerical type, a count of turns is incremented if the value on trial I<i>, for I<i> is greater than zero and less than I<n>, is, with respect to its neighbours, a peak (greater than both neighbours) or a trough (less than both neighbours). Comparing this count with the expected number of turns, and the expected variance of this count, for a randomly generated sequence completes the test.
36
37				=head1 METHODS
38
39				=head2 new
40
41				$turns = Statistics::Sequences::Turns->new();
42
43				Returns a new Turns object. Expects/accepts no arguments but the classname.
44
45				=head2 load
46
47				$turns->load(@data);
48				$turns->load(\@data);
49				$turns->load('sample1' => \@data); # labelled whatever
50
51				Loads data anonymously or by name - see L<load\|Statistics::Data/load, load_data> in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks.
52
53				=cut
54
55				sub load {
56				my $self = shift;
57				$self->SUPER::load(@_);
58				my $data = $self->read(index => -1);
59				croak __PACKAGE__, '::load All data must be numerical for turns statistics' if ! $self->all_numeric($data);
60				return 1;
61				}
62
63				=head2 add, read, unload
64
65				See L<Statistics::Data> for these additional operations on data that have been loaded.
66
67				=head2 observed, turncount_observed, tco
68
69				$v = $turns->observed(); # use anonymously loaded data
70				$v = $turns->observed(index => 1); # ... or give the required "index" for the loaded data
71				$v = $turns->observed(label => 'mysequence'); # ... or its "label" value
72				$v = $turns->observed(data => \@data); # ... or just give the data now
73
74				Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of a flat array, checking if both its left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices of the list are equal, they are treated as a single observation/datum - so the following:
75
76				0 0 1 1 0 1 1 1 0 1
77
78				is counted up for turns as
79
80				0 1 0 1 0 1
81				* * * *
82
83				So there are four turns in this example - two peaks (0 1 0) and two troughs (1 0 1). (If repeated, this sequence would significantly deviate from expectation, I<p> = .035.)
84
85				=cut
86
87				sub observed {
88				my $self = shift;
89				my $args = ref $_[0] ? shift : {@_};
90				my $data = _set_data($self, $args);
91				my $num = scalar(@{$data});
92				return 0 if ! $num or $num < 3;
93				my ($count, $i) = (0);
94
95				for ($i = 1; $i < $num - 1; $i++) {
96				if ( ($data->[$i - 1] > $data->[$i]) && ($data->[$i + 1] > $data->[$i]) ) { # trough at $i
97				$count++;
98				}
99				elsif ( ($data->[$i - 1] < $data->[$i]) && ($data->[$i + 1] < $data->[$i]) ) { # peak at $i
100				$count++;
101				}
102				}
103				return $count;
104				}
105				*turncount_observed = \&observed;
106				*tco = \&observed;
107
108				=head2 expected, turncount_expected, tce
109
110				$v = $turns->expected(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
111				$v = $turns->expected(data => \@data); # use these data
112				$v = $turns->expected(trials => 10); # don't use actual data; calculate from this number of trials
113
114				Returns the expected number of turns, which is set by I<N> the number of trials/observations/sample-size ...:
115
116				=for html <p>  <i>E[T]</i> = 2 / 3 (<i>N</i> – 2)
117
118				=cut
119
120				sub expected {
121				my $self = shift;
122				my $args = ref $_[0] ? shift : {@_};
123				my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)});
124				return 2/3 * ($num - 2);
125				}
126				*tce = \&expected;
127				*turncount_expected = \&expected;
128
129				=head2 variance, turncount_variance, tcv
130
131				$v = $turns->variance(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
132				$v = $turns->variance(data => \@data); # use these data
133				$v = $turns->variance(trials => number); # don't use actual data; calculate from this number of trials
134
135				Returns the expected variance in the number of turns for the given length of data I<N>.
136
137				=for html <p>  <i>V[T]</i> = (16<i>N</i> – 29 ) / 90
138
139				=cut
140
141				sub variance {
142				my $self = shift;
143				my $args = ref $_[0] ? shift : {@_};
144				my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)});
145				return (16 * $num - 29) / 90;
146				}
147				*tcv = \&variance;
148				*turncount_variance = \&variance;
149
150				=head2 obsdev, observed_deviation
151
152				$v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
153				$v = $turns->obsdev(data => \@data); # use these data
154
155				Returns the deviation of (difference between) observed and expected turn-count for the loaded/given sequence (I<O> - I<E>).
156
157				=cut
158
159				sub obsdev {
160				return observed(@_) - expected(@_);
161				}
162				*observed_deviation = \&obsdev;
163
164				=head2 stdev, standard_deviation
165
166				$v = $turns->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
167				$v = $turns->stdev(data => \@data);
168
169				Returns square-root of the variance.
170
171				=cut
172
173				sub stdev {
174				return sqrt(variance(@_));
175				}
176				*standard_deviation = \&stdev;
177
178				=head2 z_value, zscore, turncount_zscore, tzs
179
180				$v = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
181				$v = $turns->z_value(data => $aref, ccorr => 1);
182				($zvalue, $pvalue) = $turns->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
183
184				Returns the zscore from a test of turncount deviation, taking the turncount expected away from that observed and dividing by the root expected turncount variance, by default with a continuity correction in the numerator. Called wanting an array, returns the z-value with its p-value for the tails (1 or 2) given.
185
186				=cut
187
188				sub zscore {
189				my $self = shift;
190				my $args = ref $_[0] ? shift : {@_};
191				my $data = _set_data($self, $args);
192				my $num = scalar(@$data);
193				my ($zval, $pval) = $zed->zscore(
194				observed => defined $args->{'observed'} ? $args->{'observed'} : $self->tco($args),
195				expected => $self->tce(trials => $num),
196				variance => $self->tcv(trials => $num),
197				ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1,
198				tails => $args->{'tails'} \|\| 2,
199				precision_s => $args->{'precision_s'},
200				precision_p => $args->{'precision_p'},
201				);
202				return wantarray ? ($zval, $pval) : $zval;
203				}
204				*tzs = \&zscore;
205				*turncount_zscore = \&zscore;
206				*z_value = \&zscore;
207
208				=head2 p_value, test, turns_test, tnt
209
210				$p = $turns->p_value(); # using loaded data and default args
211				$p = $turns->p_value(ccorr => 0\|1, tails => 1\|2); # normal-approximation based on loaded data
212				$p = $turns->p_value(data => $aref, ccorr => 1, tails => 2); # using given data (by-passing load and read)
213
214				Test the currently loaded data for significance of the number of turning-points by normal approximation. Note: for turns there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24).
215
216				=cut
217
218				sub p_value {
219				return (z_value(@_))[1];
220				}
221				*test = \&p_value;
222				*turns_test = \&p_value;
223				*tnt = \&p_value;
224
225				=head2 dump
226
227				$turns->dump(flag => '1\|0', text => '0\|1\|2');
228
229				Print test results to STDOUT. See L<dump\|Statistics::Sequences/dump> in the Statistics::Sequences manpage for details.
230
231				=cut
232
233				sub dump {
234				my $self = shift;
235				my $args = ref $_[0] ? $_[0] : {@_};
236				$args->{'stat'} = 'turns';
237				$self->SUPER::dump($args);
238				return $self;
239				}
240
241				sub _set_data {# Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2):
242				my $self = shift;
243				my $args = ref $_[0] ? $_[0] : {@_};
244				my $data = $self->read($args); # have been already checked to be numerical if previously load()'ed
245				ref $data or croak __PACKAGE__, '::Data for counting up turns are needed';
246				my ($i, @data_u) = ();
247				for ($i = 0; $i < scalar(@{$data}); $i++) {
248				push @data_u, $data->[$i] if !scalar(@data_u) or $data->[$i] != $data_u[-1];
249				}
250				return \@data_u;
251				}
252
253				__END__
254
255				=head1 REFERENCES
256
257				Kendall, M. G. (1973). I<Time-series>. London, UK: Griffin. [The test is described on pages 22-24; in the Example 2.1 for this test, the expected number of turns should be calculated with the value 52 (i.e., I<n> - 2), not 54.]
258
259				=head1 SEE ALSO
260
261				L<Statistics::Sequences\|Statistics::Sequences> for other tests of sequences, and for sharing data between these tests.
262
263				=head1 TO DO/BUGS
264
265				Implementation of the serial test for non-overlapping I<v>-nomes.
266
267				=head1 REVISION HISTORY
268
269				See CHANGES in installation dist for revisions.
270
271				=head1 AUTHOR/LICENSE
272
273				=over 4
274
275				=item Copyright (c) 2006-2013 Roderick Garton
276
277				rgarton AT cpan DOT org
278
279				This program is free software. It may be used, redistributed and/or modified under the same terms as Perl-5.6.1 (or later) (see L<http://www.perl.com/perl/misc/Artistic.html>).
280
281				=back
282
283				=head1 DISCLAIMER
284
285				To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation.
286
287				=cut