File Coverage

blib/lib/Statistics/Sequences/Turns.pm
Criterion Covered Total %
statement 66 71 92.9
branch 26 36 72.2
condition 11 14 78.5
subroutine 16 17 94.1
pod 9 9 100.0
total 128 147 87.0


line stmt bran cond sub pod time code
1             package Statistics::Sequences::Turns;
2 2     2   26012 use 5.008008;
  2         4  
3 2     2   7 use strict;
  2         2  
  2         31  
4 2     2   10 use warnings;
  2         5  
  2         44  
5 2     2   5 use Carp 'croak';
  2         3  
  2         120  
6 2     2   908 use Statistics::Sequences 0.14;
  2         44374  
  2         50  
7 2     2   10 use base qw(Statistics::Sequences);
  2         3  
  2         133  
8             $Statistics::Sequences::Turns::VERSION = '0.12';
9 2     2   978 use Statistics::Zed 0.10;
  2         14756  
  2         1368  
10            
11             =pod
12            
13             =head1 NAME
14            
15             Statistics::Sequences::Turns - Kendall's turning-points test - of peaks and troughs in a numerical sequence
16            
17             =head1 SYNOPSIS
18            
19             use strict;
20             use Statistics::Sequences::Turns 0.12;
21             my $turns = Statistics::Sequences::Turns->new();
22             $turns->load([2, 0, 8.5, 5, 3, 5.01, 2, 2, 3]); # numbers; or send as "data => $aref" with each stat call
23             my $val = $turns->observed(); # or descriptive methods: expected(), variance(), obsdev() and stdev()
24             $val = $turns->z_value(); # # or in list context get back both z- and p-value
25             $val = $turns->p_value(); # as above, assume data are loaded
26             my $href = $turns->stats_hash(values => {observed => 1, p_value => 1}, ccorr => 1); # incl. any other stat-method
27             $turns->dump(values => {observed => 1, expected => 1, p_value => 1}, ccorr => 1, flag => 1, precision_s => 3, precision_p => 7);
28             # prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
29            
30             =head1 DESCRIPTION
31            
32             Implements Kendall's (1973) "turning point test" of sudden changes as peaks and troughs in the values of a numerical sequence. It is sometimes described as a test of "cyclicity", and often used as a test of randomness. However, it simply counts up the number of local maxima and minima within a sequence, regardless of their spacing and magnitude, and so does not indicate if the changes actually cycle between highs and lows, if they are more or less balanced in magnitude, or if any cycling is periodic. Kendall introduced this as a rough test of ups and downs in a sequence ahead of describing more sensitive tests based on autocorrelation and Fourier analysis.
33            
34             Specifically, for a sequence of numerical data (interval or ordinal) of size I, a count of turns is incremented if the value on trial I, for all I greater than zero and less than I, is, with respect to its immediate neighbours (the values on I - 1 and I + 1), a greater than both neighbours (a peak) or less than both neighbours (a trough). The difference of this observed number from the mean expected number of turns for a randomly generated sequence, taken as a unit of the standard deviation, gives a I-score for assessing the "randomness" of the sequence, i.e., the absence of a factor systematically affecting the frequency of peaks/troughs, given that, for turns, there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24).
35            
36             =head1 METHODS
37            
38             =head2 new
39            
40             $turns = Statistics::Sequences::Turns->new();
41            
42             Returns a new Turns object. Expects/accepts no arguments but the classname.
43            
44             =head2 load
45            
46             $turns->load(@data);
47             $turns->load(\@data);
48             $turns->load('sample1' => \@data); # labelled whatever
49            
50             Loads data anonymously or by name - see L in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks.
51            
52             =cut
53            
54             sub load {
55 2     2 1 259 my $self = shift;
56 2         8 $self->SUPER::load(@_);
57 2 50       254 croak __PACKAGE__, '::load All data must be numerical for turns statistics'
58             if !$self->all_numeric( $self->access( index => -1 ) );
59 2         643 return 1;
60             }
61            
62             =head2 add, read, unload
63            
64             See L for these additional operations on data that have been loaded.
65            
66             =head2 observed
67            
68             $v = $turns->observed(); # use anonymously loaded data
69             $v = $turns->observed(index => 1); # ... or give the required "index" for the loaded data
70             $v = $turns->observed(label => 'mysequence'); # ... or its "label" value
71             $v = $turns->observed(data => \@data); # ... or just give the data now
72            
73             Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of the sequence (a flat array), checking if both its immediate left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices in the sequence are equal, they are treated as a single observation/datum - so the following:
74            
75             0 0 1 1 0 1 1 1 0 1
76            
77             is counted up for turns as
78            
79             0 1 0 1 0 1
80             * * * *
81            
82             This shows four turns - two peaks (0 1 0) and two troughs (1 0 1).
83            
84             Returns 0 if the given list of is empty, or the number of its elements is less than 3.
85            
86             =cut
87            
88             sub observed {
89 8     8 1 468 my $self = shift;
90 8 100       15 my $args = ref $_[0] ? shift : {@_};
91 8         9 my $data = _set_data( $self, $args );
92 8         6 my $trials = scalar @{$data};
  8         7  
93 8 50 33     25 return 0 if not $trials or $trials < 3;
94 8         8 my ($count, $i) = (0);
95 8         13 for ($i = 1; $i < $trials - 1; $i++) {
96 276 100 100     942 if ( ( $data->[ $i - 1 ] > $data->[$i] )
    100 100        
97             && ( $data->[ $i + 1 ] > $data->[$i] ) )
98             { # trough at $i
99 88         112 $count++;
100             }
101             elsif (( $data->[ $i - 1 ] < $data->[$i] )
102             && ( $data->[ $i + 1 ] < $data->[$i] ) )
103             { # peak at $i
104 88         116 $count++;
105             }
106             }
107 8         25 return $count;
108             }
109             *turncount_observed = \&observed;
110             *tco = \&observed;
111            
112             =head2 expected
113            
114             $v = $turns->expected(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
115             $v = $turns->expected(data => \@data); # use these data
116             $v = $turns->expected(trials => 10); # don't use actual data; calculate from this number of trials
117            
118             Returns the expected number of turns, which is set by I the number of trials/observations/sample-size ...:
119            
120             =for html

  E[T] = 2 / 3 (N – 2)

121            
122             or, equivalently (in some sources),
123            
124             =for html

  E[T] = ( 2N – 4 ) / 3

125            
126             =cut
127            
128             sub expected {
129 7     7 1 386 my $self = shift;
130 7 50       14 my $args = ref $_[0] ? shift : {@_};
131             my $trials =
132             defined $args->{'trials'}
133             ? $args->{'trials'}
134 7 100       14 : scalar( @{ _set_data( $self, $args ) } );
  3         4  
135 7         21 return 2 / 3 * ( $trials - 2 );
136             #return (2 * $trials - 4) / 3;
137             }
138             *tce = \&expected;
139             *turncount_expected = \&expected;
140            
141             =head2 variance
142            
143             $v = $turns->variance(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
144             $v = $turns->variance(data => \@data); # use these data
145             $v = $turns->variance(trials => number); # don't use actual data; calculate from this number of trials
146            
147             Returns the expected variance in the number of turns for the given length of data I.
148            
149             =for html

  V[T] = (16N – 29 ) / 90

150            
151             =cut
152            
153             sub variance {
154 8     8 1 377 my $self = shift;
155 8 50       16 my $args = ref $_[0] ? shift : {@_};
156             my $trials =
157             defined $args->{'trials'}
158             ? $args->{'trials'}
159 8 100       12 : scalar( @{ _set_data( $self, $args ) } );
  4         6  
160 8         41 return ( 16 * $trials - 29 ) / 90;
161             }
162             *tcv = \&variance;
163             *turncount_variance = \&variance;
164            
165             =head2 obsdev
166            
167             $v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
168             $v = $turns->obsdev(data => \@data); # use these data
169            
170             Returns the observed deviation from expectation for the loaded/given sequence: observed I expected turn-count (I - I). Alias of C is supported.
171            
172             =cut
173            
174             sub obsdev {
175 1     1 1 183 return observed(@_) - expected(@_);
176             }
177             *observed_deviation = \&obsdev;
178            
179             =head2 stdev
180            
181             $v = $turns->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
182             $v = $turns->stdev(data => \@data);
183            
184             Returns square-root of the variance. Aliases C and C (common in other Statistics modules) are supported.
185            
186             =cut
187            
188             sub stdev {
189 2     2 1 366 return sqrt variance(@_);
190             }
191             *standard_deviation = \&stdev;
192             *stddev = \&stdev;
193            
194             =head2 z_value
195            
196             $z = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
197             $z = $turns->z_value(data => $aref, ccorr => 1);
198             ($z, $p) = $turns->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
199            
200             Returns the deviation ratio, or I-score, taking the turncount expected from that observed and dividing by the root variance, by default with a continuity correction in the numerator. Called in list context, returns the I-score with its normal distribution, two-tailed I

-value.

201            
202             The data to test can already have been Led, or sent directly as an aref keyed as B.
203            
204             Optional named arguments B (1 or 2), B (Boolean for the continuity-correction), B (for the statistic, i.e., I-score) and B (for the I

-value).

205            
206             The method can all be called with "sufficient" data: giving, instead of actual data, the B number of turns, and the number of B, the latter being sufficient to compute the expected number of turns and its variance.
207            
208             Alias C is supported.
209            
210             =cut
211            
212             sub z_value {
213 4     4 1 371 my $self = shift;
214 4 100       9 my $args = ref $_[0] ? shift : {@_};
215 4         5 my $data = _set_data( $self, $args );
216 4 50       8 my $trials = defined $args->{'trials'} ? $args->{'trials'} : scalar @{$data};
  4         4  
217 4         12 my $zed = Statistics::Zed->new();
218             my ( $zval, $pval ) = $zed->zscore(
219             observed => defined $args->{'observed'}
220             ? $args->{'observed'}
221             : $self->observed($args),
222             expected => $self->expected( trials => $trials ),
223             variance => $self->variance( trials => $trials ),
224             ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1,
225             tails => $args->{'tails'} || 2,
226             precision_s => $args->{'precision_s'},
227 4 50 50     60 precision_p => $args->{'precision_p'},
    100          
228             );
229 4 100       411 return wantarray ? ( $zval, $pval ) : $zval;
230             }
231             *tzs = \&z_value;
232             *turncount_zscore = \&z_value;
233             *zscore = \&z_value;
234            
235             =head2 p_value
236            
237             $p = $turns->p_value(); # using loaded data and default args
238             $p = $turns->p_value(ccorr => 0|1, tails => 1|2); # normal-approximation based on loaded data
239             $p = $turns->p_value(data => $aref, ccorr => 1, tails => 2); # using given data (by-passing load and read)
240            
241             Returns the normal distribution I

-value for the deviation ratio (I-score) of the observed number of turns, 2-tailed and continuity-correct by default (or set B => 1 and B => 0, respectively). Other arguments are as for L.

242            
243             =cut
244            
245             sub p_value {
246 1     1 1 208 return ( z_value(@_) )[1];
247             }
248             *test = \&p_value;
249             *turns_test = \&p_value;
250             *tnt = \&p_value;
251            
252             =head2 stats_hash
253            
254             $href = $turns->stats_hash(values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1}, ccorr => 1);
255            
256             Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them. See L for details. If calling via a "turns" object, the option "stat => 'turns'" is not needed (unlike when using the parent "sequences" object).
257            
258             =head2 dump
259            
260             $turns->dump(flag => '1|0', text => '0|1|2');
261            
262             Print test results to STDOUT. See L in the Statistics::Sequences manpage for details.
263            
264             =cut
265            
266             sub dump {
267 0     0 1 0 my $self = shift;
268 0 0       0 my $args = ref $_[0] ? $_[0] : {@_};
269 0         0 $args->{'stat'} = 'turns';
270 0         0 $self->SUPER::dump($args);
271 0         0 return $self;
272             }
273            
274             sub _set_data
275             { # Get data via Statistics::Date
276             # Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2):
277 19     19   15 my $self = shift;
278 19 50       27 my $args = ref $_[0] ? $_[0] : {@_};
279 19         31 my $data = $self->access($args)
280             ; # have been already checked to be numerical if previously load()'ed
281 19 50       303 ref $data or croak __PACKAGE__, '::Data for counting up turns are needed';
282 19         17 my @data_u = ();
283 19         15 for my $i( 0 .. ( scalar @{$data} - 1 ) ) {
  19         41  
284 731 100 100     1889 push @data_u, $data->[$i]
285             if not scalar @data_u
286             or $data->[$i] != $data_u[-1];
287             }
288 19         26 return \@data_u;
289             }
290            
291             __END__