File Coverage

blib/lib/Statistics/Sequences/Turns.pm

Criterion	Covered	Total	%
statement	66	71	92.9
branch	26	36	72.2
condition	11	14	78.5
subroutine	16	17	94.1
pod	9	9	100.0
total	128	147	87.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Sequences::Turns;
2	2			2		26012	use 5.008008;
	2					4
3	2			2		7	use strict;
	2					2
	2					31
4	2			2		10	use warnings;
	2					5
	2					44
5	2			2		5	use Carp 'croak';
	2					3
	2					120
6	2			2		908	use Statistics::Sequences 0.14;
	2					44374
	2					50
7	2			2		10	use base qw(Statistics::Sequences);
	2					3
	2					133
8							$Statistics::Sequences::Turns::VERSION = '0.12';
9	2			2		978	use Statistics::Zed 0.10;
	2					14756
	2					1368
10
11							=pod
12
13							=head1 NAME
14
15							Statistics::Sequences::Turns - Kendall's turning-points test - of peaks and troughs in a numerical sequence
16
17							=head1 SYNOPSIS
18
19							use strict;
20							use Statistics::Sequences::Turns 0.12;
21							my $turns = Statistics::Sequences::Turns->new();
22							$turns->load([2, 0, 8.5, 5, 3, 5.01, 2, 2, 3]); # numbers; or send as "data => $aref" with each stat call
23							my $val = $turns->observed(); # or descriptive methods: expected(), variance(), obsdev() and stdev()
24							$val = $turns->z_value(); # # or in list context get back both z- and p-value
25							$val = $turns->p_value(); # as above, assume data are loaded
26							my $href = $turns->stats_hash(values => {observed => 1, p_value => 1}, ccorr => 1); # incl. any other stat-method
27							$turns->dump(values => {observed => 1, expected => 1, p_value => 1}, ccorr => 1, flag => 1, precision_s => 3, precision_p => 7);
28							# prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
29
30							=head1 DESCRIPTION
31
32							Implements Kendall's (1973) "turning point test" of sudden changes as peaks and troughs in the values of a numerical sequence. It is sometimes described as a test of "cyclicity", and often used as a test of randomness. However, it simply counts up the number of local maxima and minima within a sequence, regardless of their spacing and magnitude, and so does not indicate if the changes actually cycle between highs and lows, if they are more or less balanced in magnitude, or if any cycling is periodic. Kendall introduced this as a rough test of ups and downs in a sequence ahead of describing more sensitive tests based on autocorrelation and Fourier analysis.
33
34							Specifically, for a sequence of numerical data (interval or ordinal) of size I, a count of turns is incremented if the value on trial I, for all I greater than zero and less than I, is, with respect to its immediate neighbours (the values on I - 1 and I + 1), a greater than both neighbours (a peak) or less than both neighbours (a trough). The difference of this observed number from the mean expected number of turns for a randomly generated sequence, taken as a unit of the standard deviation, gives a I-score for assessing the "randomness" of the sequence, i.e., the absence of a factor systematically affecting the frequency of peaks/troughs, given that, for turns, there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24).
35
36							=head1 METHODS
37
38							=head2 new
39
40							$turns = Statistics::Sequences::Turns->new();
41
42							Returns a new Turns object. Expects/accepts no arguments but the classname.
43
44							=head2 load
45
46							$turns->load(@data);
47							$turns->load(\@data);
48							$turns->load('sample1' => \@data); # labelled whatever
49
50							Loads data anonymously or by name - see L in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks.
51
52							=cut
53
54							sub load {
55	2			2	1	259	my $self = shift;
56	2					8	$self->SUPER::load(@_);
57	2	50				254	croak __PACKAGE__, '::load All data must be numerical for turns statistics'
58							if !$self->all_numeric( $self->access( index => -1 ) );
59	2					643	return 1;
60							}
61
62							=head2 add, read, unload
63
64							See L for these additional operations on data that have been loaded.
65
66							=head2 observed
67
68							$v = $turns->observed(); # use anonymously loaded data
69							$v = $turns->observed(index => 1); # ... or give the required "index" for the loaded data
70							$v = $turns->observed(label => 'mysequence'); # ... or its "label" value
71							$v = $turns->observed(data => \@data); # ... or just give the data now
72
73							Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of the sequence (a flat array), checking if both its immediate left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices in the sequence are equal, they are treated as a single observation/datum - so the following:
74
75							0 0 1 1 0 1 1 1 0 1
76
77							is counted up for turns as
78
79							0 1 0 1 0 1
80							* * * *
81
82							This shows four turns - two peaks (0 1 0) and two troughs (1 0 1).
83
84							Returns 0 if the given list of is empty, or the number of its elements is less than 3.
85
86							=cut
87
88							sub observed {
89	8			8	1	468	my $self = shift;
90	8	100				15	my $args = ref $_[0] ? shift : {@_};
91	8					9	my $data = _set_data( $self, $args );
92	8					6	my $trials = scalar @{$data};
	8					7
93	8	50	33			25	return 0 if not $trials or $trials < 3;
94	8					8	my ($count, $i) = (0);
95	8					13	for ($i = 1; $i < $trials - 1; $i++) {
96	276	100	100			942	if ( ( $data->[ $i - 1 ] > $data->[$i] )
		100	100
97							&& ( $data->[ $i + 1 ] > $data->[$i] ) )
98							{ # trough at $i
99	88					112	$count++;
100							}
101							elsif (( $data->[ $i - 1 ] < $data->[$i] )
102							&& ( $data->[ $i + 1 ] < $data->[$i] ) )
103							{ # peak at $i
104	88					116	$count++;
105							}
106							}
107	8					25	return $count;
108							}
109							*turncount_observed = \&observed;
110							*tco = \&observed;
111
112							=head2 expected
113
114							$v = $turns->expected(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
115							$v = $turns->expected(data => \@data); # use these data
116							$v = $turns->expected(trials => 10); # don't use actual data; calculate from this number of trials
117
118							Returns the expected number of turns, which is set by I the number of trials/observations/sample-size ...:
119
120							=for html E[T] = 2 / 3 (N – 2)
121
122							or, equivalently (in some sources),
123
124							=for html E[T] = ( 2N – 4 ) / 3
125
126							=cut
127
128							sub expected {
129	7			7	1	386	my $self = shift;
130	7	50				14	my $args = ref $_[0] ? shift : {@_};
131							my $trials =
132							defined $args->{'trials'}
133							? $args->{'trials'}
134	7	100				14	: scalar( @{ _set_data( $self, $args ) } );
	3					4
135	7					21	return 2 / 3 * ( $trials - 2 );
136							#return (2 * $trials - 4) / 3;
137							}
138							*tce = \&expected;
139							*turncount_expected = \&expected;
140
141							=head2 variance
142
143							$v = $turns->variance(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
144							$v = $turns->variance(data => \@data); # use these data
145							$v = $turns->variance(trials => number); # don't use actual data; calculate from this number of trials
146
147							Returns the expected variance in the number of turns for the given length of data I.
148
149							=for html V[T] = (16N – 29 ) / 90
150
151							=cut
152
153							sub variance {
154	8			8	1	377	my $self = shift;
155	8	50				16	my $args = ref $_[0] ? shift : {@_};
156							my $trials =
157							defined $args->{'trials'}
158							? $args->{'trials'}
159	8	100				12	: scalar( @{ _set_data( $self, $args ) } );
	4					6
160	8					41	return ( 16 * $trials - 29 ) / 90;
161							}
162							*tcv = \&variance;
163							*turncount_variance = \&variance;
164
165							=head2 obsdev
166
167							$v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
168							$v = $turns->obsdev(data => \@data); # use these data
169
170							Returns the observed deviation from expectation for the loaded/given sequence: observed I expected turn-count (I - I). Alias of C is supported.
171
172							=cut
173
174							sub obsdev {
175	1			1	1	183	return observed(@_) - expected(@_);
176							}
177							*observed_deviation = \&obsdev;
178
179							=head2 stdev
180
181							$v = $turns->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
182							$v = $turns->stdev(data => \@data);
183
184							Returns square-root of the variance. Aliases C and C (common in other Statistics modules) are supported.
185
186							=cut
187
188							sub stdev {
189	2			2	1	366	return sqrt variance(@_);
190							}
191							*standard_deviation = \&stdev;
192							*stddev = \&stdev;
193
194							=head2 z_value
195
196							$z = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
197							$z = $turns->z_value(data => $aref, ccorr => 1);
198							($z, $p) = $turns->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
199
200							Returns the deviation ratio, or I-score, taking the turncount expected from that observed and dividing by the root variance, by default with a continuity correction in the numerator. Called in list context, returns the I-score with its normal distribution, two-tailed I -value.
201
202							The data to test can already have been Led, or sent directly as an aref keyed as B.
203
204							Optional named arguments B (1 or 2), B (Boolean for the continuity-correction), B (for the statistic, i.e., I-score) and B (for the I -value).
205
206							The method can all be called with "sufficient" data: giving, instead of actual data, the B number of turns, and the number of B, the latter being sufficient to compute the expected number of turns and its variance.
207
208							Alias C is supported.
209
210							=cut
211
212							sub z_value {
213	4			4	1	371	my $self = shift;
214	4	100				9	my $args = ref $_[0] ? shift : {@_};
215	4					5	my $data = _set_data( $self, $args );
216	4	50				8	my $trials = defined $args->{'trials'} ? $args->{'trials'} : scalar @{$data};
	4					4
217	4					12	my $zed = Statistics::Zed->new();
218							my ( $zval, $pval ) = $zed->zscore(
219							observed => defined $args->{'observed'}
220							? $args->{'observed'}
221							: $self->observed($args),
222							expected => $self->expected( trials => $trials ),
223							variance => $self->variance( trials => $trials ),
224							ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1,
225							tails => $args->{'tails'} \|\| 2,
226							precision_s => $args->{'precision_s'},
227	4	50	50			60	precision_p => $args->{'precision_p'},
		100
228							);
229	4	100				411	return wantarray ? ( $zval, $pval ) : $zval;
230							}
231							*tzs = \&z_value;
232							*turncount_zscore = \&z_value;
233							*zscore = \&z_value;
234
235							=head2 p_value
236
237							$p = $turns->p_value(); # using loaded data and default args
238							$p = $turns->p_value(ccorr => 0\|1, tails => 1\|2); # normal-approximation based on loaded data
239							$p = $turns->p_value(data => $aref, ccorr => 1, tails => 2); # using given data (by-passing load and read)
240
241							Returns the normal distribution I -value for the deviation ratio (I-score) of the observed number of turns, 2-tailed and continuity-correct by default (or set B => 1 and B => 0, respectively). Other arguments are as for L.
242
243							=cut
244
245							sub p_value {
246	1			1	1	208	return ( z_value(@_) )[1];
247							}
248							*test = \&p_value;
249							*turns_test = \&p_value;
250							*tnt = \&p_value;
251
252							=head2 stats_hash
253
254							$href = $turns->stats_hash(values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1}, ccorr => 1);
255
256							Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them. See L for details. If calling via a "turns" object, the option "stat => 'turns'" is not needed (unlike when using the parent "sequences" object).
257
258							=head2 dump
259
260							$turns->dump(flag => '1\|0', text => '0\|1\|2');
261
262							Print test results to STDOUT. See L in the Statistics::Sequences manpage for details.
263
264							=cut
265
266							sub dump {
267	0			0	1	0	my $self = shift;
268	0	0				0	my $args = ref $_[0] ? $_[0] : {@_};
269	0					0	$args->{'stat'} = 'turns';
270	0					0	$self->SUPER::dump($args);
271	0					0	return $self;
272							}
273
274							sub _set_data
275							{ # Get data via Statistics::Date
276							# Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2):
277	19			19		15	my $self = shift;
278	19	50				27	my $args = ref $_[0] ? $_[0] : {@_};
279	19					31	my $data = $self->access($args)
280							; # have been already checked to be numerical if previously load()'ed
281	19	50				303	ref $data or croak __PACKAGE__, '::Data for counting up turns are needed';
282	19					17	my @data_u = ();
283	19					15	for my $i( 0 .. ( scalar @{$data} - 1 ) ) {
	19					41
284	731	100	100			1889	push @data_u, $data->[$i]
285							if not scalar @data_u
286							or $data->[$i] != $data_u[-1];
287							}
288	19					26	return \@data_u;
289							}
290
291							__END__