File Coverage

blib/lib/Statistics/Sequences/Turns.pm
Criterion Covered Total %
statement 63 68 92.6
branch 26 36 72.2
condition 11 14 78.5
subroutine 15 16 93.7
pod 9 9 100.0
total 124 143 86.7


line stmt bran cond sub pod time code
1             package Statistics::Sequences::Turns;
2 2     2   27270 use 5.008008;
  2         5  
3 2     2   8 use strict;
  2         2  
  2         36  
4 2     2   7 use warnings;
  2         9  
  2         50  
5 2     2   6 use Carp 'croak';
  2         2  
  2         112  
6 2     2   6 use base qw(Statistics::Sequences);
  2         3  
  2         873  
7             $Statistics::Sequences::Turns::VERSION = '0.13';
8 2     2   56200 use Statistics::Zed 0.10;
  2         12481  
  2         1229  
9            
10             =pod
11            
12             =head1 NAME
13            
14             Statistics::Sequences::Turns - Kendall's turning-points test - of peaks and troughs in a numerical sequence
15            
16             =head1 VERSION
17            
18             This is documentation for B of Statistics::Sequences::Turns.
19            
20             =head1 SYNOPSIS
21            
22             use strict;
23             use Statistics::Sequences::Turns 0.13;
24             my $turns = Statistics::Sequences::Turns->new();
25             $turns->load([2, 0, 8.5, 5, 3, 5.01, 2, 2, 3]); # numbers; or give as "data => $aref" with each stat call
26             my $val = $turns->observed(); # or descriptive methods: expected(), variance(), obsdev() and stdev()
27             $val = $turns->z_value(); # # or in list context get both z- and p-value
28             $val = $turns->p_value(); # as above, assume data are loaded
29             my $href = $turns->stats_hash(values => [qw/observed p_value/], ccorr => 1); # incl. any other stat-method
30             $turns->dump(values => [qw/observed expected p_value/], ccorr => 1, flag => 1, precision_s => 3, precision_p => 7);
31             # prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
32            
33             =head1 DESCRIPTION
34            
35             Implements Kendall's (1973) "turning point test" of sudden changes as peaks and troughs in the values of a numerical sequence. It is sometimes described as a test of "cyclicity", and often used as a test of randomness. Kendall (1973) introduced this as a test of ups and downs relative to linear progressions in a sequence (ahead of describing tests based on autocorrelation and Fourier analysis).
36            
37             Specifically, for a sequence of numerical data (interval or ordinal) of size I, a count of turns is incremented if the value on trial I, for all I greater than zero and less than I, is, with respect to its immediate neighbours (the values on I - 1 and I + 1), greater than both neighbours (a peak) or less than both neighbours (a trough). The difference of this observed number from the mean expected number of turns for a randomly generated sequence, taken as a unit of the standard deviation, gives a I-score for assessing the "randomness" of the sequence, i.e., the absence of a factor systematically affecting the frequency of peaks/troughs, given that, for turns, there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24).
38            
39             With these local fluctuations tested regardless of their spacing and magnitude, the test does not indicate if the changes actually cycle between highs and lows, if they are more or less balanced in magnitude, or if any cycling is periodic; only if oscillation in general is more common than linear progression.
40            
41             =head1 METHODS
42            
43             =head2 new
44            
45             $turns = Statistics::Sequences::Turns->new();
46            
47             Returns a new Turns object. Expects/accepts no arguments but the classname.
48            
49             =head2 load
50            
51             $turns->load(@data);
52             $turns->load(\@data);
53             $turns->load('foodat' => \@data); # labelled whatever
54            
55             Loads data anonymously or by name - see L in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks.
56            
57             =cut
58            
59             sub load {
60 2     2 1 262 my $self = shift;
61 2         10 $self->SUPER::load(@_);
62 2 50       292 croak __PACKAGE__, '::load All data must be numerical for turns statistics'
63             if !$self->all_numeric( $self->access( index => -1 ) );
64 2         315 return 1;
65             }
66            
67             =head2 add, read, unload
68            
69             See L for these additional operations on data that have been loaded.
70            
71             =head2 observed
72            
73             $v = $turns->observed(); # use anonymously loaded data
74             $v = $turns->observed(name => 'mysequence'); # ... or by "name" given on loading
75             $v = $turns->observed(data => \@data); # ... or just give the data now
76            
77             Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of the sequence (a flat array), checking if both its immediate left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices in the sequence are equal, they are treated as a single observation/datum - so the following:
78            
79             0 0 1 1 0 1 1 1 0 1
80            
81             is counted up for turns as
82            
83             0 1 0 1 0 1
84             * * * *
85            
86             This shows four turns - two peaks (0 1 0) and two troughs (1 0 1).
87            
88             Returns 0 if the given list of is empty, or the number of its elements is less than 3.
89            
90             =cut
91            
92             sub observed {
93 8     8 1 319 my $self = shift;
94 8 100       17 my $args = ref $_[0] ? shift : {@_};
95 8         16 my $data = _set_data( $self, $args );
96 8         5 my $trials = scalar @{$data};
  8         10  
97 8 50 33     25 return 0 if not $trials or $trials < 3;
98 8         11 my ( $count, $i ) = (0);
99 8         17 for ( $i = 1 ; $i < $trials - 1 ; $i++ ) {
100 276 100 100     909 if ( ( $data->[ $i - 1 ] > $data->[$i] )
    100 100        
101             && ( $data->[ $i + 1 ] > $data->[$i] ) )
102             { # trough at $i
103 88         104 $count++;
104             }
105             elsif (( $data->[ $i - 1 ] < $data->[$i] )
106             && ( $data->[ $i + 1 ] < $data->[$i] ) )
107             { # peak at $i
108 88         120 $count++;
109             }
110             }
111 8         27 return $count;
112             }
113            
114             =head2 expected
115            
116             $v = $turns->expected(); # use loaded data; or specify by "name"
117             $v = $turns->expected(data => \@data); # use these data
118             $v = $turns->expected(trials => POS_INT); # don't use actual data; calculate from this number of trials
119            
120             Returns the expected number of turns, which is set by I the number of trials/observations/sample-size ...:
121            
122             =for html

  E[T] = 2 / 3 (N – 2)

123            
124             or, equivalently (in some sources),
125            
126             =for html

  E[T] = ( 2N – 4 ) / 3

127            
128             =cut
129            
130             sub expected {
131 7     7 1 398 my $self = shift;
132 7 50       19 my $args = ref $_[0] ? shift : {@_};
133             my $trials =
134             defined $args->{'trials'}
135             ? $args->{'trials'}
136 7 100       13 : scalar( @{ _set_data( $self, $args ) } );
  3         8  
137 7         25 return 2 / 3 * ( $trials - 2 );
138            
139             #return (2 * $trials - 4) / 3;
140             }
141            
142             =head2 variance
143            
144             $v = $turns->variance(); # use loaded data; or specify by "name"
145             $v = $turns->variance(data => \@data); # use these data
146             $v = $turns->variance(trials => POS_INT); # don't use actual data; calculate from this number of trials
147            
148             Returns the expected variance in the number of turns for the given length of data I.
149            
150             =for html

  V[T] = (16N – 29 ) / 90

151            
152             =cut
153            
154             sub variance {
155 8     8 1 416 my $self = shift;
156 8 50       20 my $args = ref $_[0] ? shift : {@_};
157             my $trials =
158             defined $args->{'trials'}
159             ? $args->{'trials'}
160 8 100       15 : scalar( @{ _set_data( $self, $args ) } );
  4         9  
161 8         52 return ( 16 * $trials - 29 ) / 90;
162             }
163            
164             =head2 obsdev
165            
166             $v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "name"
167             $v = $turns->obsdev(data => \@data); # use these data
168            
169             Returns the observed deviation from expectation for the loaded/given sequence: observed I expected turn-count (I - I). Alias of C is supported.
170            
171             =cut
172            
173             sub obsdev {
174 1     1 1 200 return observed(@_) - expected(@_);
175             }
176             *observed_deviation = \&obsdev;
177            
178             =head2 stdev
179            
180             $v = $turns->stdev(); # use data already loaded - anonymously; or specify its "name"
181             $v = $turns->stdev(data => \@data);
182            
183             Returns square-root of the variance. Aliases C and C (common in other Statistics modules) are supported.
184            
185             =cut
186            
187             sub stdev {
188 2     2 1 434 return sqrt variance(@_);
189             }
190             *standard_deviation = \&stdev;
191             *stddev = \&stdev;
192            
193             =head2 z_value
194            
195             $z = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "name"
196             $z = $turns->z_value(data => $aref, ccorr => BOOL);
197             ($z, $p) = $turns->z_value(data => $aref, ccorr => BOOL, tails => 2); # same but wanting an array, get the p-value too
198            
199             Returns the deviation ratio, or I-score, taking the turncount expected from that observed and dividing by the root variance, by default with a continuity correction in the numerator. Called in list context, returns the I-score with its normal distribution, two-tailed I

-value.

200            
201             The data to test can already have been Led, or sent directly as an aref keyed as B.
202            
203             Optional named arguments B (1 or 2), B (Boolean for the continuity-correction), B (for the statistic, i.e., I-score) and B (for the I

-value).

204            
205             The method can all be called with "sufficient" data: giving, instead of actual data, the B number of turns, and the number of B, the latter being sufficient to compute the expected number of turns and its variance.
206            
207             Alias C is supported.
208            
209             =cut
210            
211             sub z_value {
212 4     4 1 659 my $self = shift;
213 4 100       13 my $args = ref $_[0] ? shift : {@_};
214 4         9 my $data = _set_data( $self, $args );
215             my $trials =
216 4 50       11 defined $args->{'trials'} ? $args->{'trials'} : scalar @{$data};
  4         5  
217 4         19 my $zed = Statistics::Zed->new();
218             my ( $zval, $pval ) = $zed->zscore(
219             observed => defined $args->{'observed'}
220             ? $args->{'observed'}
221             : $self->observed($args),
222             expected => $self->expected( trials => $trials ),
223             variance => $self->variance( trials => $trials ),
224             ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1,
225             tails => $args->{'tails'} || 2,
226             precision_s => $args->{'precision_s'},
227 4 50 50     88 precision_p => $args->{'precision_p'},
    100          
228             );
229 4 100       430 return wantarray ? ( $zval, $pval ) : $zval;
230             }
231             *z_score = \&z_value;
232            
233             =head2 p_value
234            
235             $p = $turns->p_value(); # using loaded data and default args
236             $p = $turns->p_value(ccorr => BOOL, tails => 1|2); # normal-approximation based on loaded data
237             $p = $turns->p_value(data => $aref, ccorr => BOOL, tails => 2); # using given data
238            
239             Returns the normal distribution I

-value for the deviation ratio (I-score) of the observed number of turns, 2-tailed and continuity-correct by default (or set B => 1 and B => 0, respectively). Other arguments are as for L.

240            
241             =cut
242            
243             sub p_value {
244 1     1 1 202 return ( z_value(@_) )[1];
245             }
246            
247             =head2 stats_hash
248            
249             $href = $turns->stats_hash(values => [qw/observed expected variance z_value p_value/], ccorr => 1);
250            
251             Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them. See L in the Statistics::Sequences manpage for details/options. If calling via a "turns" object, the option "stat => 'turns'" is not needed (unlike when using the parent Statistics::Sequences object).
252            
253             =head2 dump
254            
255             $turns->dump(flag => BOOL, verbose => BOOL, format => 'table|labline|csv');
256            
257             Print test results to STDOUT. See L in the Statistics::Sequences manpage for details/options.
258            
259             =cut
260            
261             sub dump {
262 0     0 1 0 my $self = shift;
263 0 0       0 my $args = ref $_[0] ? $_[0] : {@_};
264 0         0 $args->{'stat'} = 'turns';
265 0         0 $self->SUPER::dump($args);
266 0         0 return $self;
267             }
268            
269             sub _set_data { # Get data via Statistics::Date
270             # Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2):
271 19     19   18 my $self = shift;
272 19 50       39 my $args = ref $_[0] ? $_[0] : {@_};
273 19         50 my $data = $self->access($args)
274             ; # have been already checked to be numerical if previously load()'ed
275 19 50       563 ref $data or croak __PACKAGE__, '::Data for counting up turns are needed';
276 19         21 my @data_u = ();
277 19         23 for my $i ( 0 .. ( scalar @{$data} - 1 ) ) {
  19         42  
278 731 100 100     2424 push @data_u, $data->[$i]
279             if not scalar @data_u
280             or $data->[$i] != $data_u[-1];
281             }
282 19         40 return \@data_u;
283             }
284            
285             __END__