line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Sequences::Turns; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
24440
|
use 5.008008; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
44
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
39
|
|
5
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
31
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp 'croak'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
90
|
|
7
|
1
|
|
|
1
|
|
7
|
use vars qw($VERSION @ISA); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
61
|
|
8
|
1
|
|
|
1
|
|
511
|
use Statistics::Sequences 0.11; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
@ISA = qw(Statistics::Sequences); |
10
|
|
|
|
|
|
|
$VERSION = 0.11; |
11
|
|
|
|
|
|
|
use Statistics::Zed 0.072; |
12
|
|
|
|
|
|
|
our $zed = Statistics::Zed->new(); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=pod |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Statistics::Sequences::Turns - Kendall's test for turning-points - peaks or troughs - in a numerical sequence |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use strict; |
23
|
|
|
|
|
|
|
use Statistics::Sequences::Turns 0.11; |
24
|
|
|
|
|
|
|
my $turns = Statistics::Sequences::Turns->new(); |
25
|
|
|
|
|
|
|
$turns->load([qw/2 0 8 5 3 5 2 3 1 1 9 4 4 1 5 5 6 5 8 7 5 3 8 5 6/]); # strings/numbers; or send as "data => $aref" with each stat call |
26
|
|
|
|
|
|
|
my $val = $turns->observed(state => 5); # other methods include: expected(), variance(), obsdev() and stdev() |
27
|
|
|
|
|
|
|
$val = $turns->zscore(state => 5, tails => 2, ccorr => 1); # # or want an array & get back both z- and p-value |
28
|
|
|
|
|
|
|
$val = $turns->p_value(state => 5, tails => 1); # assuming data are loaded; alias: test() |
29
|
|
|
|
|
|
|
my $href = $turns->stats_hash(values => {observed => 1, p_value => 1}, ccorr => 1); # include any other stat-method as needed |
30
|
|
|
|
|
|
|
$turns->dump(values => {observed => 1, expected => 1, p_value => 1}, ccorr => 1, flag => 1, precision_s => 3, precision_p => 7); |
31
|
|
|
|
|
|
|
# prints: observed = 11.000, expected = 10.900, p_value = 0.5700167 |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 DESCRIPTION |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
For data of the continuous numerical type, a count of turns is incremented if the value on trial I<i>, for I<i> is greater than zero and less than I<n>, is, with respect to its neighbours, a peak (greater than both neighbours) or a trough (less than both neighbours). Comparing this count with the expected number of turns, and the expected variance of this count, for a randomly generated sequence completes the test. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 METHODS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 new |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$turns = Statistics::Sequences::Turns->new(); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Returns a new Turns object. Expects/accepts no arguments but the classname. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 load |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$turns->load(@data); |
48
|
|
|
|
|
|
|
$turns->load(\@data); |
49
|
|
|
|
|
|
|
$turns->load('sample1' => \@data); # labelled whatever |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Loads data anonymously or by name - see L<load|Statistics::Data/load, load_data> in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=cut |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub load { |
56
|
|
|
|
|
|
|
my $self = shift; |
57
|
|
|
|
|
|
|
$self->SUPER::load(@_); |
58
|
|
|
|
|
|
|
my $data = $self->read(index => -1); |
59
|
|
|
|
|
|
|
croak __PACKAGE__, '::load All data must be numerical for turns statistics' if ! $self->all_numeric($data); |
60
|
|
|
|
|
|
|
return 1; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 add, read, unload |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
See L<Statistics::Data> for these additional operations on data that have been loaded. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head2 observed, turncount_observed, tco |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
$v = $turns->observed(); # use anonymously loaded data |
70
|
|
|
|
|
|
|
$v = $turns->observed(index => 1); # ... or give the required "index" for the loaded data |
71
|
|
|
|
|
|
|
$v = $turns->observed(label => 'mysequence'); # ... or its "label" value |
72
|
|
|
|
|
|
|
$v = $turns->observed(data => \@data); # ... or just give the data now |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of a flat array, checking if both its left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices of the list are equal, they are treated as a single observation/datum - so the following: |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
0 0 1 1 0 1 1 1 0 1 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
is counted up for turns as |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
0 1 0 1 0 1 |
81
|
|
|
|
|
|
|
* * * * |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
So there are four turns in this example - two peaks (0 1 0) and two troughs (1 0 1). (If repeated, this sequence would significantly deviate from expectation, I<p> = .035.) |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub observed { |
88
|
|
|
|
|
|
|
my $self = shift; |
89
|
|
|
|
|
|
|
my $args = ref $_[0] ? shift : {@_}; |
90
|
|
|
|
|
|
|
my $data = _set_data($self, $args); |
91
|
|
|
|
|
|
|
my $num = scalar(@{$data}); |
92
|
|
|
|
|
|
|
return 0 if ! $num or $num < 3; |
93
|
|
|
|
|
|
|
my ($count, $i) = (0); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
for ($i = 1; $i < $num - 1; $i++) { |
96
|
|
|
|
|
|
|
if ( ($data->[$i - 1] > $data->[$i]) && ($data->[$i + 1] > $data->[$i]) ) { # trough at $i |
97
|
|
|
|
|
|
|
$count++; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
elsif ( ($data->[$i - 1] < $data->[$i]) && ($data->[$i + 1] < $data->[$i]) ) { # peak at $i |
100
|
|
|
|
|
|
|
$count++; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
return $count; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
*turncount_observed = \&observed; |
106
|
|
|
|
|
|
|
*tco = \&observed; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 expected, turncount_expected, tce |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
$v = $turns->expected(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed() |
111
|
|
|
|
|
|
|
$v = $turns->expected(data => \@data); # use these data |
112
|
|
|
|
|
|
|
$v = $turns->expected(trials => 10); # don't use actual data; calculate from this number of trials |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Returns the expected number of turns, which is set by I<N> the number of trials/observations/sample-size ...: |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=for html <p> <i>E[T]</i> = 2 / 3 (<i>N</i> – 2) |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub expected { |
121
|
|
|
|
|
|
|
my $self = shift; |
122
|
|
|
|
|
|
|
my $args = ref $_[0] ? shift : {@_}; |
123
|
|
|
|
|
|
|
my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)}); |
124
|
|
|
|
|
|
|
return 2/3 * ($num - 2); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
*tce = \&expected; |
127
|
|
|
|
|
|
|
*turncount_expected = \&expected; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 variance, turncount_variance, tcv |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
$v = $turns->variance(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed() |
132
|
|
|
|
|
|
|
$v = $turns->variance(data => \@data); # use these data |
133
|
|
|
|
|
|
|
$v = $turns->variance(trials => number); # don't use actual data; calculate from this number of trials |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Returns the expected variance in the number of turns for the given length of data I<N>. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=for html <p> <i>V[T]</i> = (16<i>N</i> – 29 ) / 90 |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub variance { |
142
|
|
|
|
|
|
|
my $self = shift; |
143
|
|
|
|
|
|
|
my $args = ref $_[0] ? shift : {@_}; |
144
|
|
|
|
|
|
|
my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)}); |
145
|
|
|
|
|
|
|
return (16 * $num - 29) / 90; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
*tcv = \&variance; |
148
|
|
|
|
|
|
|
*turncount_variance = \&variance; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head2 obsdev, observed_deviation |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
$v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed() |
153
|
|
|
|
|
|
|
$v = $turns->obsdev(data => \@data); # use these data |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Returns the deviation of (difference between) observed and expected turn-count for the loaded/given sequence (I<O> - I<E>). |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub obsdev { |
160
|
|
|
|
|
|
|
return observed(@_) - expected(@_); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
*observed_deviation = \&obsdev; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 stdev, standard_deviation |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$v = $turns->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed() |
167
|
|
|
|
|
|
|
$v = $turns->stdev(data => \@data); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Returns square-root of the variance. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub stdev { |
174
|
|
|
|
|
|
|
return sqrt(variance(@_)); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
*standard_deviation = \&stdev; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 z_value, zscore, turncount_zscore, tzs |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
$v = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed() |
181
|
|
|
|
|
|
|
$v = $turns->z_value(data => $aref, ccorr => 1); |
182
|
|
|
|
|
|
|
($zvalue, $pvalue) = $turns->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Returns the zscore from a test of turncount deviation, taking the turncount expected away from that observed and dividing by the root expected turncount variance, by default with a continuity correction in the numerator. Called wanting an array, returns the z-value with its p-value for the tails (1 or 2) given. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=cut |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub zscore { |
189
|
|
|
|
|
|
|
my $self = shift; |
190
|
|
|
|
|
|
|
my $args = ref $_[0] ? shift : {@_}; |
191
|
|
|
|
|
|
|
my $data = _set_data($self, $args); |
192
|
|
|
|
|
|
|
my $num = scalar(@$data); |
193
|
|
|
|
|
|
|
my ($zval, $pval) = $zed->zscore( |
194
|
|
|
|
|
|
|
observed => defined $args->{'observed'} ? $args->{'observed'} : $self->tco($args), |
195
|
|
|
|
|
|
|
expected => $self->tce(trials => $num), |
196
|
|
|
|
|
|
|
variance => $self->tcv(trials => $num), |
197
|
|
|
|
|
|
|
ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1, |
198
|
|
|
|
|
|
|
tails => $args->{'tails'} || 2, |
199
|
|
|
|
|
|
|
precision_s => $args->{'precision_s'}, |
200
|
|
|
|
|
|
|
precision_p => $args->{'precision_p'}, |
201
|
|
|
|
|
|
|
); |
202
|
|
|
|
|
|
|
return wantarray ? ($zval, $pval) : $zval; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
*tzs = \&zscore; |
205
|
|
|
|
|
|
|
*turncount_zscore = \&zscore; |
206
|
|
|
|
|
|
|
*z_value = \&zscore; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head2 p_value, test, turns_test, tnt |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
$p = $turns->p_value(); # using loaded data and default args |
211
|
|
|
|
|
|
|
$p = $turns->p_value(ccorr => 0|1, tails => 1|2); # normal-approximation based on loaded data |
212
|
|
|
|
|
|
|
$p = $turns->p_value(data => $aref, ccorr => 1, tails => 2); # using given data (by-passing load and read) |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Test the currently loaded data for significance of the number of turning-points by normal approximation. Note: for turns there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24). |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub p_value { |
219
|
|
|
|
|
|
|
return (z_value(@_))[1]; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
*test = \&p_value; |
222
|
|
|
|
|
|
|
*turns_test = \&p_value; |
223
|
|
|
|
|
|
|
*tnt = \&p_value; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head2 dump |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
$turns->dump(flag => '1|0', text => '0|1|2'); |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Print test results to STDOUT. See L<dump|Statistics::Sequences/dump> in the Statistics::Sequences manpage for details. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=cut |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub dump { |
234
|
|
|
|
|
|
|
my $self = shift; |
235
|
|
|
|
|
|
|
my $args = ref $_[0] ? $_[0] : {@_}; |
236
|
|
|
|
|
|
|
$args->{'stat'} = 'turns'; |
237
|
|
|
|
|
|
|
$self->SUPER::dump($args); |
238
|
|
|
|
|
|
|
return $self; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _set_data {# Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2): |
242
|
|
|
|
|
|
|
my $self = shift; |
243
|
|
|
|
|
|
|
my $args = ref $_[0] ? $_[0] : {@_}; |
244
|
|
|
|
|
|
|
my $data = $self->read($args); # have been already checked to be numerical if previously load()'ed |
245
|
|
|
|
|
|
|
ref $data or croak __PACKAGE__, '::Data for counting up turns are needed'; |
246
|
|
|
|
|
|
|
my ($i, @data_u) = (); |
247
|
|
|
|
|
|
|
for ($i = 0; $i < scalar(@{$data}); $i++) { |
248
|
|
|
|
|
|
|
push @data_u, $data->[$i] if !scalar(@data_u) or $data->[$i] != $data_u[-1]; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
return \@data_u; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
__END__ |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 REFERENCES |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Kendall, M. G. (1973). I<Time-series>. London, UK: Griffin. [The test is described on pages 22-24; in the Example 2.1 for this test, the expected number of turns should be calculated with the value 52 (i.e., I<n> - 2), not 54.] |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=head1 SEE ALSO |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
L<Statistics::Sequences|Statistics::Sequences> for other tests of sequences, and for sharing data between these tests. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head1 TO DO/BUGS |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Implementation of the serial test for non-overlapping I<v>-nomes. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 REVISION HISTORY |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
See CHANGES in installation dist for revisions. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 AUTHOR/LICENSE |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=over 4 |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=item Copyright (c) 2006-2013 Roderick Garton |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
rgarton AT cpan DOT org |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
This program is free software. It may be used, redistributed and/or modified under the same terms as Perl-5.6.1 (or later) (see L<http://www.perl.com/perl/misc/Artistic.html>). |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=back |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=head1 DISCLAIMER |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=cut |