File Coverage

blib/lib/Statistics/Sequences.pm
Criterion Covered Total %
statement 30 151 19.8
branch 0 64 0.0
condition 0 4 0.0
subroutine 10 24 41.6
pod 8 9 88.8
total 48 252 19.0


line stmt bran cond sub pod time code
1             package Statistics::Sequences;
2 2     2   27514 use strict;
  2         3  
  2         54  
3 2     2   7 use warnings FATAL => 'all';
  2         2  
  2         81  
4 2     2   7 use Carp qw(croak);
  2         9  
  2         121  
5 2     2   943 use Statistics::Data 0.11;
  2         45633  
  2         60  
6 2     2   13 use base qw(Statistics::Data);
  2         2  
  2         129  
7 2     2   9 use Scalar::Util qw(looks_like_number);
  2         2  
  2         79  
8 2     2   917 use Statistics::Lite qw(max);
  2         2067  
  2         105  
9 2     2   741 use String::Numeric qw(is_int);
  2         4004  
  2         591  
10             $Statistics::Sequences::VERSION = '0.15';
11            
12             =pod
13            
14             =head1 NAME
15            
16             Statistics::Sequences - Common methods/interface for sub-module sequential tests (of Runs, Joins, Pot, etc.)
17            
18             =head1 VERSION
19            
20             This is documentation for Version 0.15 of Statistics::Sequences.
21            
22             =head1 SYNOPSIS
23            
24             use Statistics::Sequences 0.15;
25             $seq = Statistics::Sequences->new();
26             my @data = (1, 'a', 'a', 1); # ordered list
27             $seq->load(\@data); # or @data or 'name' => \@data
28             print $seq->observed(stat => 'runs'); # assuming sub-module Runs.pm is installed
29             print $seq->test(stat => 'vnomes', length => 2); # assuming sub-module Vnomes.pm is installed
30             $seq->dump(stat => 'runs', values => [qw/observed z_value p_value/], exact => 1, tails => 1);
31             # see also Statistics::Data for inherited methods
32            
33             =head1 DESCRIPTION
34            
35             This module provides methods for loading, updating and accessing data as ordered list of scalar values (numbers, strings) for statistical tests of their sequential properties via sub-modules including L, L, L, L and L. None of these sub-modules are installed by default.
36            
37             It also provides a common interface to access the statistical values returned by these tests, so that several tests can be performed on the same data, with the same class object. Alternatively, L each sub-module directly.
38            
39             =head1 SUBROUTINES/METHODS
40            
41             =head2 new
42            
43             $seq = Statistics::Sequences->new();
44            
45             Returns a new Statistics::Sequences object (inherited from L) by which all the methods for caching, reading and testing data can be accessed, including each of the methods for performing the L, L, L, L or Ltests.
46            
47             Sub-packages also have their own new method - so, e.g., L, can be individually imported, and its own L method can be called, e.g.:
48            
49             use Statistics::Sequences::Runs;
50             $runs = Statistics::Sequences::Runs->new();
51            
52             In this case, data are not automatically shared across packages, and only one test (in this case, the Runs-test) can be accessed through the class-object.
53            
54             =head2 load, add, access, unload
55            
56             All these operations on the basic data are inherited from L - see this doc for details of these and other possible methods.
57            
58             =head2 observed
59            
60             $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
61             $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
62             $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', label => 'myLabelledLoadedData'); # just needs args for partic.stats
63            
64             If this method is defined by the sub-module named in the argument B, returns the observed value of the statistic for the Led data, or data sent with this call, eg., how many runs in the sequence (1, 1, 0, 1). See the particular statistic's manpage for any other arguments needed or optional.
65            
66             =cut
67            
68 0     0 1   sub observed { return _feed( 'observed', @_ ); }
69             *observation = \&observed;
70            
71             =head2 expected
72            
73             $v = $seq->expected(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
74             $v = $seq->expected(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
75            
76             If this method is defined by the sub-module named in the argument B, returns the expected value of the statistic for the Led data, or data sent with this call, eg., how many runs should occur in a 4-length sequence of two possible events. See the statistic's manpage for any other arguments needed or optional.
77            
78             =cut
79            
80 0     0 1   sub expected { return _feed( 'expected', @_ ); }
81             *expectation = \&expected;
82            
83             =head2 variance
84            
85             $seq->variance(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
86             $seq->variance(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
87            
88             Returns the expected range of deviation in the statistic's observed value for the given number of trials, if this method is defined by the sub-module named in the argument B.
89            
90             =cut
91            
92 0     0 1   sub variance { return _feed( 'variance', @_ ); }
93            
94             =head2 obsdev
95            
96             $v = $seq->obsdev(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
97             $v = $seq->obsdev(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
98            
99             Returns the deviation of (difference between) observed and expected values of the statistic for the loaded/given sequence (I - I); if this method is defined by the sub-module named in the argument B.
100            
101             =cut
102            
103             sub obsdev {
104 0     0 1   return observed(@_) - expected(@_);
105             }
106             *observed_deviation = \&obsdev;
107            
108             =head2 stdev
109            
110             $v = $seq->stdev(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
111             $v = $seq->stdev(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
112            
113             Returns square-root of the variance, if this method is defined by the sub-module named in the argument B.
114            
115             =cut
116            
117             sub stdev {
118 0     0 1   return sqrt variance(@_);
119             }
120             *standard_deviation = \&stdev;
121            
122             =head2 z_value
123            
124             $v = $seq->z_value(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat
125             $v = $seq->z_value(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats
126            
127             Return the deviation ratio: observed deviation to standard deviation. Use argument B for continuity correction.
128            
129             =cut
130            
131 0     0 0   sub zscore { return _feed( 'zscore', @_ ); }
132             *z_value = \&zscore;
133            
134             =head2 p_value
135            
136             $p = $seq->p_value(stat => 'runs'); # same for 'joins', 'turns'
137             $p = $seq->p_value(stat => 'pot', state => 'a value appearing in the data');
138             $p = $seq->p_value(stat => 'vnomes', length => 'an integer greater than zero and less than sample-size');
139            
140             Returns the probability of observing so many runs, joins, etc., according to whatever such method is defined by the sub-module named in the argument B.
141            
142             =cut
143            
144 0     0 1   sub p_value { return _feed( 'p_value', @_ ); }
145             *test = \&p_value;
146            
147             =head2 stats_hash
148            
149             $href = $seq->stats_hash(values => [qw/observed expected variance z_value p_value/]);
150             $href = $seq->stats_hash(values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1});
151            
152             Returns a hashref with values for any of the methods for the specified Bistic (e.g., observed() value for runs). The named argument B is for an array-ref of stats that correspond to the method names for the given B (which is really a class name, e.g., runs, pot, for a Statistics::Sequences sub-module). The hash-reference of stat-values as keys (also shown in example above) is only in place for the purpose of setting optional args per value in a future version.
153            
154             Include other required or optional arguments relevant to any of the values requested, as defined in the sub-module manpages, e.g., B if getting a z_value, B and B if getting a p_value, B if testing pot, B if testing joins. The args B and B apply to all values, although the latter specifically applies to any C.
155            
156             =cut
157            
158             sub stats_hash {
159 0     0 1   my $self = shift;
160 0 0         my $args = ref $_[0] ? $_[0] : {@_};
161 0 0         croak 'No values requested to return in hash' if !ref $args->{'values'};
162 0           my @methods = ();
163 0 0         if ( ref $args->{'values'} eq 'ARRAY' ) {
164 0           @methods = @{ $args->{'values'} };
  0            
165             }
166             else { # assume hash
167             # later version might check for optional args per value, not just == 1:
168             @methods =
169 0           grep { $args->{'values'}->{$_} == 1 } keys %{ $args->{'values'} };
  0            
  0            
170             }
171            
172 0           my (%stats_hash) = ();
173 2     2   9 no strict 'refs';
  2         2  
  2         841  
174 0           foreach my $method (@methods) {
175 0           eval { $stats_hash{$method} = $self->$method($args); };
  0            
176 0 0         croak "Method <$method> is not defined or correctly called for "
177             . __PACKAGE__
178             if $@;
179             }
180 0           return \%stats_hash;
181             }
182            
183             =head2 dump
184            
185             $seq->dump(stat => 'runs|joins|pot ...', values => {}, format => 'string|labline|table', flag => '1|0', precision_s => 'integer', precision_p => 'integer');
186            
187             I: B
188            
189             Print results of the last-conducted test to STDOUT. By default, if no parameters to C are passed, a single line of test statistics is printed. Options are as follows.
190            
191             =over 8
192            
193             =item values => hashref
194            
195             Hashref of the statistical parameters to dump. Default is observed value and p-value for the given B.
196            
197             =item flag => I
198            
199             If true, the I

-value associated with the I-value is appended with a single asterisk if the value if below .05, and with two asterisks if it is below .01.

200            
201             If false (default), nothing is appended to the I

-value.

202            
203             =item format => 'table|labline|csv'
204            
205             Default is 'csv', to print the stats hash as a comma-separated string (no newline), e.g., '4.0000,0.8596800". If specifying 'labline', you get something like "observed = 4.0000, p_value = 0.8596800\n". If specifying "table", this is a dump from L with the stat methods as headers and column length set to the maximum required for the given headers, level of precision, flag, etc. For example, with B => 4 and B => 7, you get:
206            
207             .-----------+-----------.
208             | observed | p_value |
209             +-----------+-----------+
210             | 4.0000 | 0.8596800 |
211             '-----------+-----------'
212            
213             =item verbose => 1|0
214            
215             If true, includes a title giving the name of the statistic, details about the hypothesis tested (if B => 1 in the B hashref), et al. No effect if B is not defined or equals 'csv'.
216            
217             =item precision_s => 'I'
218            
219             Precision of the statistic values (observed, expected, variance, z_value).
220            
221             =item precision_p => 'I'
222            
223             Specify rounding of the probability associated with the I-value to so many digits. If zero or undefined, you get everything available.
224            
225             =back
226            
227             =cut
228            
229             sub dump {
230 0     0 1   my $self = shift;
231 0 0         my $args = ref $_[0] ? $_[0] : {@_};
232 0           my $stats_hash = $self->stats_hash($args);
233 0   0       $args->{'format'} ||= 'csv';
234 0           my @standard_methods =
235             (qw/observed expected variance obsdev stdev z_value psisq p_value/);
236 0           my ( $maxlen, @strs, @headers ) = (0);
237            
238             # set up what has been requested in a meaningful order:
239 0           my @wanted_methods = grep { defined $stats_hash->{$_} } @standard_methods;
  0            
240            
241             # add any extra "non-standard" methods
242 0           foreach my $method ( keys %{$stats_hash} ) {
  0            
243 0 0         if ( !grep /$method/, @wanted_methods ) {
244 0           push @wanted_methods, $method;
245             }
246             }
247            
248             # format each value for printing, adjusting its length if necessary:
249 0           foreach my $method (@wanted_methods) {
250 0           my $val = delete $stats_hash->{$method};
251 0           my $len;
252 0           ( $val, $len ) = _format_output_value( $val, $method, $args );
253 0           push @headers, $method;
254 0           push @strs, $val;
255 0 0         $len = length $val if !defined $len;
256 0 0         $maxlen = $len if $len > $maxlen;
257             }
258            
259 0 0         if ( $args->{'format'} eq 'table' ) {
    0          
260 0           _print_table( $maxlen, \@headers, \@strs, $args );
261             }
262             elsif ( $args->{'format'} eq 'labline' ) {
263 0           _print_labline( \@headers, \@strs, $args );
264             }
265             else { # csv
266 0 0         print join( q{,}, @strs ) or croak 'Cannot print data-string';
267             }
268 0           return;
269             }
270             *print_summary = \&dump;
271            
272             =head2 dump_data
273            
274             $seq->dump_data(delim => "\n");
275            
276             Prints to STDOUT a space-separated line of the tested data - as dichotomized and put to test. Optionally, give a value for B to specify how the datapoints should be separated. Inherited from L.
277            
278             =cut
279            
280             # PRIVATMETHODEN
281            
282             sub _feed {
283 0     0     my $method = shift;
284 0           my $self = shift;
285 0 0         my $args = ref $_[0] ? $_[0] : {@_};
286 0   0       my $statname = $args->{'stat'} || q{};
287 0           my $class = __PACKAGE__ . q{::} . ucfirst($statname);
288 0           eval {require $class};
  0            
289 0 0         if ($@) {
290 0           croak __PACKAGE__,
291             " error: Requested sequences module '$class' is not available";
292             }
293 0           my ( $val, $nself ) = ( q{}, {} );
294 0           bless( $nself, $class );
295 0           $nself->{$_} = $self->{$_} foreach keys %{$self};
  0            
296 2     2   19 no strict 'refs';
  2         2  
  2         1057  
297 0           eval {$val = $nself->$method($args)}
  0            
298             ; # but does not trap "deep recursion" if method not defined
299 0 0         if ($@) {
300 0           croak __PACKAGE__,
301             " error: Method '$method' is not defined or correctly called for $class";
302             }
303 0           return $val;
304             }
305            
306             sub _precisioned {
307 0     0     my ( $len, $val ) = @_;
308 0           my $nval;
309 0 0         if ( !defined $val ) {
    0          
    0          
310 0           $nval = q{};
311             }
312             elsif ( is_int($val) ) {
313 0           $nval = $val;
314             }
315             elsif ($len) { # don't lose any zero
316 0           $nval = sprintf( q{%.} . $len . q{f}, $val );
317             }
318             else {
319 0           $nval = $val;
320             }
321 0           return $nval;
322             }
323            
324             sub _format_output_value {
325 0     0     my ( $val, $method, $args ) = @_;
326 0           my $len;
327 0 0         if ( $method eq 'p_value' ) {
328 0           $val = _precisioned( $args->{'precision_p'}, $val );
329             $val .= ( $val < .05 ? ( $val < .01 ? q{**} : q{*} ) : q{} )
330 0 0         if $args->{'flag'};
    0          
    0          
331             }
332             else {
333 0 0         if ( ref $val ) {
    0          
334 0 0         if ( ref $val eq 'HASH' ) {
335 0           my %vals = %{$val};
  0            
336 0           $val = q{};
337 0 0         my $delim = $args->{'format'} eq 'table' ? "\n" : q{,};
338 0           my ( $str, $this_len ) = ();
339 0           while ( my ( $k, $v ) = each %vals ) {
340 0           $str = "'$k' = $v";
341 0           $len = max( length($str), $len );
342 0           $val .= $str . $delim;
343             }
344 0 0         if ( $args->{'format'} ne 'table' ) {
345 0           chop $val;
346 0           $val = '(' . $val . ')';
347             }
348             }
349             else {
350 0           $val = join q{, }, @{$val};
  0            
351             }
352             }
353             elsif ( looks_like_number($val) ) {
354 0           $val = _precisioned( $args->{'precision_s'}, $val );
355             }
356             }
357 0           return ( $val, $len );
358             }
359            
360             sub _print_table {
361 0     0     my ( $maxlen, $headers, $strs, $args ) = @_;
362 0 0         $maxlen = 8 if $maxlen < 8;
363             my $title =
364             $args->{'verbose'}
365 0 0         ? ucfirst( $args->{'stat'} ) . " statistics\n"
366             : q{};
367 0           my @hh = ();
368 0           push( @hh, [ $maxlen, $_ ] ) foreach @{$headers};
  0            
369 0           require Text::SimpleTable;
370 0           my $tbl = Text::SimpleTable->new(@hh);
371 0           $tbl->row( @{$strs} );
  0            
372 0 0         print $title or croak 'Cannot print table title';
373 0 0         print $tbl->draw or croak 'Cannot print data-table';
374 0           return;
375             }
376            
377             sub _print_labline {
378 0     0     my ( $headers, $strs, $args ) = @_;
379            
380 0           my @hh;
381 0           for my $i ( 0 .. ( scalar @{$strs} - 1 ) ) {
  0            
382 0           $hh[$i] = "$headers->[$i] = $strs->[$i]";
383             }
384 0           my $str = join( q{, }, @hh );
385 0 0         if ( $args->{'verbose'} ) {
386 0           $str = ucfirst( $args->{'stat'} ) . ': ' . $str;
387             }
388 0 0         print {*STDOUT} $str, "\n" or croak 'Cannot print data-string';
  0            
389 0           return;
390             }
391            
392             =head1 DIAGNOSTICS
393            
394             =over 8
395            
396             =item Requested sequences module '$class' is not available
397            
398             Croaked when any method is called that is not defined for the sub-module named as B.
399            
400             =item Method '$method' is not defined or correctly called for $class
401            
402             Method, like observed() called for a particular class (with the argument B in this parent module) might not exist, e.g., like 'kurtosis' among the 'pot' statistics; or the other arguments for the method are invalid, like calling them without any B.
403            
404             =item No values requested to return in hash
405            
406             Croaked from L, including va dump(), if array or hash ref named B is not given in the call.
407            
408             =item Cannot print data-string
409            
410             Courtesy of the dump() method; when trying to C a string as a single line or a table (via Text::SimpleTable's C).
411            
412             =back
413            
414             =head1 BUNDLING
415            
416             This module Cs its sub-modules implicitly - so a bundled program using this module might need to explicitly C its sub-modules if these need to be included in the bundle itself.
417            
418             =head1 AUTHOR
419            
420             Roderick Garton, C<< >>
421            
422             =head1 SUPPORT
423            
424             You can find documentation for this module with the perldoc command.
425            
426             perldoc Statistics::Sequences
427            
428             You can also look for information at:
429            
430             =over 4
431            
432             =item * RT: CPAN's request tracker (report bugs here)
433            
434             L
435            
436             =item * AnnoCPAN: Annotated CPAN documentation
437            
438             L
439            
440             =item * CPAN Ratings
441            
442             L
443            
444             =item * Search CPAN
445            
446             L
447            
448             =back
449            
450             =head1 LICENSE AND COPYRIGHT
451            
452             =over 4
453            
454             =item Copyright (c) 2006-2017 Roderick Garton
455            
456             This program is free software. It may be used, redistributed and/or modified under the same terms as Perl-5.6.1 (or later) (see L).
457            
458             =item Disclaimer
459            
460             To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation.
461            
462             =back
463            
464             =cut
465            
466             1; # end of Statistics::Sequences