File Coverage

blib/lib/Statistics/Sequences/Turns.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package Statistics::Sequences::Turns;
2              
3 1     1   24440 use 5.008008;
  1         5  
  1         44  
4 1     1   6 use strict;
  1         2  
  1         39  
5 1     1   6 use warnings;
  1         7  
  1         31  
6 1     1   5 use Carp 'croak';
  1         2  
  1         90  
7 1     1   7 use vars qw($VERSION @ISA);
  1         3  
  1         61  
8 1     1   511 use Statistics::Sequences 0.11;
  0            
  0            
9             @ISA = qw(Statistics::Sequences);
10             $VERSION = 0.11;
11             use Statistics::Zed 0.072;
12             our $zed = Statistics::Zed->new();
13              
14             =pod
15              
16             =head1 NAME
17              
18             Statistics::Sequences::Turns - Kendall's test for turning-points - peaks or troughs - in a numerical sequence
19              
20             =head1 SYNOPSIS
21              
22             use strict;
23             use Statistics::Sequences::Turns 0.11;
24             my $turns = Statistics::Sequences::Turns->new();
25             $turns->load([qw/2 0 8 5 3 5 2 3 1 1 9 4 4 1 5 5 6 5 8 7 5 3 8 5 6/]); # strings/numbers; or send as "data => $aref" with each stat call
26             my $val = $turns->observed(state => 5); # other methods include: expected(), variance(), obsdev() and stdev()
27             $val = $turns->zscore(state => 5, tails => 2, ccorr => 1); # # or want an array & get back both z- and p-value
28             $val = $turns->p_value(state => 5, tails => 1); # assuming data are loaded; alias: test()
29             my $href = $turns->stats_hash(values => {observed => 1, p_value => 1}, ccorr => 1); # include any other stat-method as needed
30             $turns->dump(values => {observed => 1, expected => 1, p_value => 1}, ccorr => 1, flag => 1, precision_s => 3, precision_p => 7);
31             # prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
32              
33             =head1 DESCRIPTION
34              
35             For data of the continuous numerical type, a count of turns is incremented if the value on trial I<i>, for I<i> is greater than zero and less than I<n>, is, with respect to its neighbours, a peak (greater than both neighbours) or a trough (less than both neighbours). Comparing this count with the expected number of turns, and the expected variance of this count, for a randomly generated sequence completes the test.
36              
37             =head1 METHODS
38              
39             =head2 new
40              
41             $turns = Statistics::Sequences::Turns->new();
42              
43             Returns a new Turns object. Expects/accepts no arguments but the classname.
44              
45             =head2 load
46              
47             $turns->load(@data);
48             $turns->load(\@data);
49             $turns->load('sample1' => \@data); # labelled whatever
50              
51             Loads data anonymously or by name - see L<load|Statistics::Data/load, load_data> in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here). Data must be numerical (ordinal, interval type). All elements must be numerical of the method croaks.
52              
53             =cut
54              
55             sub load {
56             my $self = shift;
57             $self->SUPER::load(@_);
58             my $data = $self->read(index => -1);
59             croak __PACKAGE__, '::load All data must be numerical for turns statistics' if ! $self->all_numeric($data);
60             return 1;
61             }
62              
63             =head2 add, read, unload
64              
65             See L<Statistics::Data> for these additional operations on data that have been loaded.
66              
67             =head2 observed, turncount_observed, tco
68              
69             $v = $turns->observed(); # use anonymously loaded data
70             $v = $turns->observed(index => 1); # ... or give the required "index" for the loaded data
71             $v = $turns->observed(label => 'mysequence'); # ... or its "label" value
72             $v = $turns->observed(data => \@data); # ... or just give the data now
73              
74             Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of a flat array, checking if both its left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices of the list are equal, they are treated as a single observation/datum - so the following:
75              
76             0 0 1 1 0 1 1 1 0 1
77              
78             is counted up for turns as
79              
80             0 1 0 1 0 1
81             * * * *
82              
83             So there are four turns in this example - two peaks (0 1 0) and two troughs (1 0 1). (If repeated, this sequence would significantly deviate from expectation, I<p> = .035.)
84              
85             =cut
86              
87             sub observed {
88             my $self = shift;
89             my $args = ref $_[0] ? shift : {@_};
90             my $data = _set_data($self, $args);
91             my $num = scalar(@{$data});
92             return 0 if ! $num or $num < 3;
93             my ($count, $i) = (0);
94            
95             for ($i = 1; $i < $num - 1; $i++) {
96             if ( ($data->[$i - 1] > $data->[$i]) && ($data->[$i + 1] > $data->[$i]) ) { # trough at $i
97             $count++;
98             }
99             elsif ( ($data->[$i - 1] < $data->[$i]) && ($data->[$i + 1] < $data->[$i]) ) { # peak at $i
100             $count++;
101             }
102             }
103             return $count;
104             }
105             *turncount_observed = \&observed;
106             *tco = \&observed;
107              
108             =head2 expected, turncount_expected, tce
109              
110             $v = $turns->expected(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
111             $v = $turns->expected(data => \@data); # use these data
112             $v = $turns->expected(trials => 10); # don't use actual data; calculate from this number of trials
113              
114             Returns the expected number of turns, which is set by I<N> the number of trials/observations/sample-size ...:
115              
116             =for html <p>&nbsp;&nbsp;<i>E[T]</i> = 2 / 3 (<i>N</i> &ndash; 2)
117              
118             =cut
119            
120             sub expected {
121             my $self = shift;
122             my $args = ref $_[0] ? shift : {@_};
123             my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)});
124             return 2/3 * ($num - 2);
125             }
126             *tce = \&expected;
127             *turncount_expected = \&expected;
128              
129             =head2 variance, turncount_variance, tcv
130              
131             $v = $turns->variance(); # use first-loaded data; or specify by "index" or "label", or give it as "data" - see observed()
132             $v = $turns->variance(data => \@data); # use these data
133             $v = $turns->variance(trials => number); # don't use actual data; calculate from this number of trials
134              
135             Returns the expected variance in the number of turns for the given length of data I<N>.
136              
137             =for html <p>&nbsp;&nbsp;<i>V[T]</i> = (16<i>N</i> &ndash; 29 ) / 90
138              
139             =cut
140              
141             sub variance {
142             my $self = shift;
143             my $args = ref $_[0] ? shift : {@_};
144             my $num = defined $args->{'trials'} ? $args->{'trials'} : scalar(@{_set_data($self, $args)});
145             return (16 * $num - 29) / 90;
146             }
147             *tcv = \&variance;
148             *turncount_variance = \&variance;
149              
150             =head2 obsdev, observed_deviation
151              
152             $v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
153             $v = $turns->obsdev(data => \@data); # use these data
154              
155             Returns the deviation of (difference between) observed and expected turn-count for the loaded/given sequence (I<O> - I<E>).
156              
157             =cut
158              
159             sub obsdev {
160             return observed(@_) - expected(@_);
161             }
162             *observed_deviation = \&obsdev;
163              
164             =head2 stdev, standard_deviation
165              
166             $v = $turns->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
167             $v = $turns->stdev(data => \@data);
168              
169             Returns square-root of the variance.
170              
171             =cut
172              
173             sub stdev {
174             return sqrt(variance(@_));
175             }
176             *standard_deviation = \&stdev;
177              
178             =head2 z_value, zscore, turncount_zscore, tzs
179              
180             $v = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
181             $v = $turns->z_value(data => $aref, ccorr => 1);
182             ($zvalue, $pvalue) = $turns->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
183              
184             Returns the zscore from a test of turncount deviation, taking the turncount expected away from that observed and dividing by the root expected turncount variance, by default with a continuity correction in the numerator. Called wanting an array, returns the z-value with its p-value for the tails (1 or 2) given.
185              
186             =cut
187              
188             sub zscore {
189             my $self = shift;
190             my $args = ref $_[0] ? shift : {@_};
191             my $data = _set_data($self, $args);
192             my $num = scalar(@$data);
193             my ($zval, $pval) = $zed->zscore(
194             observed => defined $args->{'observed'} ? $args->{'observed'} : $self->tco($args),
195             expected => $self->tce(trials => $num),
196             variance => $self->tcv(trials => $num),
197             ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1,
198             tails => $args->{'tails'} || 2,
199             precision_s => $args->{'precision_s'},
200             precision_p => $args->{'precision_p'},
201             );
202             return wantarray ? ($zval, $pval) : $zval;
203             }
204             *tzs = \&zscore;
205             *turncount_zscore = \&zscore;
206             *z_value = \&zscore;
207              
208             =head2 p_value, test, turns_test, tnt
209              
210             $p = $turns->p_value(); # using loaded data and default args
211             $p = $turns->p_value(ccorr => 0|1, tails => 1|2); # normal-approximation based on loaded data
212             $p = $turns->p_value(data => $aref, ccorr => 1, tails => 2); # using given data (by-passing load and read)
213              
214             Test the currently loaded data for significance of the number of turning-points by normal approximation. Note: for turns there is "a fairly rapid tendency of the distribution to normality" (Kendall 1973, p. 24).
215              
216             =cut
217              
218             sub p_value {
219             return (z_value(@_))[1];
220             }
221             *test = \&p_value;
222             *turns_test = \&p_value;
223             *tnt = \&p_value;
224              
225             =head2 dump
226              
227             $turns->dump(flag => '1|0', text => '0|1|2');
228              
229             Print test results to STDOUT. See L<dump|Statistics::Sequences/dump> in the Statistics::Sequences manpage for details.
230              
231             =cut
232              
233             sub dump {
234             my $self = shift;
235             my $args = ref $_[0] ? $_[0] : {@_};
236             $args->{'stat'} = 'turns';
237             $self->SUPER::dump($args);
238             return $self;
239             }
240              
241             sub _set_data {# Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2):
242             my $self = shift;
243             my $args = ref $_[0] ? $_[0] : {@_};
244             my $data = $self->read($args); # have been already checked to be numerical if previously load()'ed
245             ref $data or croak __PACKAGE__, '::Data for counting up turns are needed';
246             my ($i, @data_u) = ();
247             for ($i = 0; $i < scalar(@{$data}); $i++) {
248             push @data_u, $data->[$i] if !scalar(@data_u) or $data->[$i] != $data_u[-1];
249             }
250             return \@data_u;
251             }
252              
253             __END__
254              
255             =head1 REFERENCES
256              
257             Kendall, M. G. (1973). I<Time-series>. London, UK: Griffin. [The test is described on pages 22-24; in the Example 2.1 for this test, the expected number of turns should be calculated with the value 52 (i.e., I<n> - 2), not 54.]
258              
259             =head1 SEE ALSO
260              
261             L<Statistics::Sequences|Statistics::Sequences> for other tests of sequences, and for sharing data between these tests.
262              
263             =head1 TO DO/BUGS
264              
265             Implementation of the serial test for non-overlapping I<v>-nomes.
266              
267             =head1 REVISION HISTORY
268              
269             See CHANGES in installation dist for revisions.
270              
271             =head1 AUTHOR/LICENSE
272              
273             =over 4
274              
275             =item Copyright (c) 2006-2013 Roderick Garton
276              
277             rgarton AT cpan DOT org
278              
279             This program is free software. It may be used, redistributed and/or modified under the same terms as Perl-5.6.1 (or later) (see L<http://www.perl.com/perl/misc/Artistic.html>).
280              
281             =back
282              
283             =head1 DISCLAIMER
284              
285             To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation.
286              
287             =cut