File Coverage

blib/lib/Statistics/Sequences/Runs.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             package Statistics::Sequences::Runs;
2 1     1   17398 use 5.008008;
  1         3  
  1         31  
3 1     1   4 use strict;
  1         1  
  1         30  
4 1     1   3 use warnings FATAL => 'all';
  1         6  
  1         44  
5 1     1   4 use Carp qw(carp croak);
  1         1  
  1         70  
6 1     1   4 use base qw(Statistics::Sequences);
  1         1  
  1         367  
7 1     1   277 use List::AllUtils qw(mesh sum uniq);
  0            
  0            
8             use Number::Misc qw(is_even);
9             use Statistics::Zed 0.08;
10             $Statistics::Sequences::Runs::VERSION = '0.21';
11            
12             =pod
13            
14             =head1 NAME
15            
16             Statistics::Sequences::Runs - descriptives, deviation and combinatorial tests of Wald-Wolfowitz runs
17            
18             =head1 VERSION
19            
20             This is documentation for Version 0.21 of Statistics::Sequences::Runs.
21            
22             =head1 SYNOPSIS
23            
24             use strict;
25             use Statistics::Sequences::Runs 0.21; # not compatible with versions < .10
26             my $runs = Statistics::Sequences::Runs->new();
27            
28             # Data make up a dichotomous sequence:
29             my @data = (qw/1 0 0 0 1 1 0 1 1 0 0 1 0 0 1 1 1 1 0 1/);
30             my $val;
31            
32             # - Pre-load data to use for all methods:
33             $runs->load(\@data);
34             $val = $runs->observed();
35             $val = $runs->expected();
36            
37             # - or send data as "data => $aref" to each method:
38             $val = $runs->observed(data => \@data);
39            
40             # - or send frequencies of each of the 2 elements:
41             $val = $runs->expected(freqs => [11, 9]); # works with other methods except observed()
42            
43             # Deviation ratio:
44             $val = $runs->z_value(ccorr => 1);
45            
46             # Probability:
47             my ($z, $p) = $runs->z_value(ccorr => 1, tails => 1); # dev. ratio with p-value
48             $val = $runs->p_value(tails => 1); # normal dist. p-value itself
49             $val = $runs->p_value(exact => 1, tails => 1); # by combinatorics
50            
51             # Keyed list of descriptives etc.:
52             my $href = $runs->stats_hash(values => {observed => 1, p_value => 1}, exact => 1);
53            
54             # Print descriptives etc. in the same way:
55             $runs->dump(
56             values => {observed => 1, expected => 1, p_value => 1},
57             exact => 1,
58             flag => 1,
59             precision_s => 3,
60             precision_p => 7
61             );
62             # prints: observed = 11.000, expected = 10.900, p_value = 0.5700167
63            
64             =head1 DESCRIPTION
65            
66             The module returns statistical information re Wald-type runs: a sequence of events on 1 or more consecutive trials. For example, in a signal-detection test composed of match (H) and miss (M) events over time like H-H-M-H-M-M-M-M-H, there are 5 runs: 3 Hs, 2 Ms. This number of runs between two events can be compared with the number expected to occur by chance over the number of trials, relative to the expected variance (see REFERENCES). More runs than expected ("negative serial dependence") can denote irregularity, instability, mixing up of alternatives. Fewer runs than expected ("positive serial dependence") can denote cohesion, insulation, isolation of alternatives. Both can indicate sequential dependency: either negative (a bias to produce too many alternations), or positive (a bias to produce too many repetitions).
67            
68             The distribution of runs is asymptotically normal, and a deviation-based test of extra-chance occurrence when at least one alternative has more than 20 occurrences (Siegal rule), or both event occurrences exceed 10 (Kelly, 1982), is conventionally considered reliable; otherwise, the module provides an "exact test" option, based on combinatorics.
69            
70             Have non-dichotomous, continuous or multinomial data? See L for how to prepare them for test of runs.
71            
72             =head1 SUBROUTINES/METHODS
73            
74             =head2 Data-handling
75            
76             =head3 new
77            
78             $runs = Statistics::Sequences::Runs->new();
79            
80             Returns a new Runs object. Expects/accepts no arguments but the classname.
81            
82             =head3 load
83            
84             $runs->load(@data);
85             $runs->load(\@data);
86             $runs->load(foodat => \@data); # labelled whatever
87            
88             Loads data anonymously or by name - see L in the Statistics::Data manpage for details on the various ways data can be loaded and then retrieved (more than shown here).
89            
90             After the load, the data are L to ensure that they contain only two unique elements - if not, carp occurs and 0 rather than 1 is returned.
91            
92             Alternatively, skip this action; data don't always have to be loaded to use the stats methods here. To get the observed number of runs, data of course have to be loaded, but other stats can be got if given the observed count - otherwise, they too depend on data having been loaded.
93            
94             Every load unloads all previous loads and any additions to them.
95            
96             =head3 add, access, unload
97            
98             See L for these additional operations on data that have been loaded.
99            
100             =head2 Descriptives
101            
102             =head3 observed
103            
104             $v = $runs->observed(); # use the first data loaded anonymously
105             $v = $runs->observed(index => 1); # ... or give the required "index" for the loaded data
106             $v = $runs->observed(label => 'foodat'); # ... or its "label" value
107             $v = $runs->observed(data => [1, 0, 1, 1]); # ... or just give the data now
108            
109             Returns the total observed number of runs in the loaded or given data. For example,
110            
111             $v = $runs->observed_per_state(data => [qw/H H H T T H H/]);
112            
113             returns 3.
114            
115             I: runcount_observed, rco
116            
117             =cut
118            
119             sub observed {
120             my ( $self, @args ) = @_;
121             my $args = ref $args[0] ? $args[0] : {@args};
122             return $args->{'observed'} if defined $args->{'observed'};
123             my $data = _get_data( $self, $args );
124             my $rco = 1;
125             for ( 1 .. scalar @{$data} - 1 ) {
126             $rco++ if $data->[$_] ne $data->[ $_ - 1 ];
127             }
128             return $rco;
129             }
130             *runcount_observed = \&observed;
131             *rco = \&observed;
132            
133             =head3 observed_per_state
134            
135             @freq = $runs->observed_per_state(data => \@data);
136             $href = $runs->observed_per_state(data => \@data);
137            
138             Returns the number of runs per state - as an array where the first element gives the count for the first state in the data, and so for the second. A keyed hashref is returned if not called in array context. For example:
139            
140             @ari = $runs->observed_per_state(data => [qw/H H H T T H H/]); # returns (2, 1)
141             $ref = $runs->observed_per_state(data => [qw/H H H T T H H/]); # returns { H => 2, T => 1}
142            
143             =cut
144            
145             sub observed_per_state {
146             my ( $self, @args ) = @_;
147             my $args = ref $args[0] ? $args[0] : {@args};
148             my $data = _get_data( $self, $args );
149             my @states = uniq @{$data};
150             my @freqs = $data->[0] eq $states[0] ? ( 1, 0 ) : ( 0, 1 );
151             for ( 1 .. scalar @{$data} - 1 ) {
152             if ( $data->[$_] ne $data->[ $_ - 1 ] ) {
153             if ( $data->[$_] eq $states[0] ) {
154             $freqs[0]++;
155             }
156             else {
157             $freqs[1]++;
158             }
159             }
160             }
161             return wantarray ? @freqs : { mesh @states, @freqs };
162             }
163            
164             =head3 expected
165            
166             $v = $runs->expected(); # or specify loaded data by "index" or "label", or give it as "data" - see observed()
167             $v = $runs->expected(data => [qw/blah bing blah blah blah/]); # use these data
168             $v = $runs->expected(freqs => [12, 7]); # don't use actual data; calculate from these two Ns
169            
170             Returns the expected number of runs across the loaded data. Expectation is given as follows:
171            
172             =for html

  E[R] = ( (2n1n2) / (n1 + n2) ) + 1

173            
174             where I(I is the number of observations of each element in the data.
175            
176             I: runcount_expected, rce
177            
178             =cut
179            
180             sub expected {
181             my ( $self, @args ) = @_;
182             my $args = ref $args[0] ? $args[0] : {@args};
183             my ( $n1, $n2 ) = $self->bi_frequency($args);
184             my $sum = $n1 + $n2;
185             return $sum ? ( ( 2 * $n1 * $n2 ) / $sum ) + 1 : undef;
186             }
187             *rce = \&expected;
188             *runcount_expected = \&expected;
189            
190             =head3 variance
191            
192             $v = $runs->variance(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
193             $v = $runs->variance(data => [qw/blah bing blah blah blah/]); # use these data
194             $v = $runs->variance(freqs => [5, 12]); # use these trial numbers - not any particular sequence of data
195            
196             Returns the variance in the number of runs for the given data.
197            
198             =for html

  V[R] = ( (2n1n2)([2n1n2] – [n1 + n2]) ) / ( ((n1 + n2)2)((n1 + n2) – 1) )

199            
200             defined as above for L.
201            
202             The data to test can already have been Led, or you send it directly as a flat referenced array keyed as B.
203            
204             I: runcount_variance, rcv
205            
206             =cut
207            
208             sub variance {
209             my ( $self, @args ) = @_;
210             my $args = ref $args[0] ? $args[0] : {@args};
211             my ( $n1, $n2 ) = $self->bi_frequency($args);
212             my $sum = $n1 + $n2;
213             return $sum < 2
214             ? 1
215             : ( ( 2 * $n1 * $n2 * ( ( 2 * $n1 * $n2 ) - $sum ) ) /
216             ( ( $sum**2 ) * ( $sum - 1 ) ) );
217             }
218             *rcv = \&variance;
219             *runcount_variance = \&variance;
220            
221             =head3 observed_deviation
222            
223             $v = $runs->obsdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
224             $v = $runs->obsdev(data => [qw/blah bing blah blah blah/]); # use these data
225            
226             Returns the deviation of (difference between) observed and expected runs for the loaded/given sequence (I - I).
227            
228             I: obsdev
229            
230             =cut
231            
232             sub observed_deviation {
233             return observed(@_) - expected(@_);
234             }
235             *obsdev = \&observed_deviation;
236            
237             =head3 standard_deviation
238            
239             $v = $runs->stdev(); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
240             $v = $runs->stdev(data => [qw/blah bing blah blah blah/]);
241            
242             Returns square-root of the variance.
243            
244             I: stdev
245            
246             =cut
247            
248             sub standard_deviation {
249             return sqrt variance(@_);
250             }
251             *stdev = \&standard_deviation;
252            
253             =head3 skewness
254            
255             Returns run skewness as given by Barton & David (1958) based on the frequencies of the two different elements in the sequence.
256            
257             =cut
258            
259             sub skewness {
260             my ( $self, @args ) = @_;
261             my $args = ref $args[0] ? $args[0] : {@args};
262             my ( $n1, $n2 ) = $self->bi_frequency($args);
263             my $k3 = 0;
264             if ( $n1 != $n2 ) {
265             my $sum = $n1 + $n2;
266             $k3 =
267             ( ( 2 * $n1 * $n2 ) / $sum**3 ) *
268             ( ( ( 16 * $n1**2 * $n2**2 ) / $sum**2 ) -
269             ( ( 4 * $n1 * $n2 * ( $sum + 3 ) ) / $sum ) +
270             3 * $sum );
271             }
272             return $k3;
273             }
274            
275             =head3 kurtosis
276            
277             Returns run kurtosis as given by Barton & David (1958) based on the frequencies of the two different elements in the sequence.
278            
279             =cut
280            
281             sub kurtosis {
282             my ( $self, @args ) = @_;
283             my $args = ref $args[0] ? $args[0] : {@args};
284             my ( $n1, $n2 ) = $self->bi_frequency($args);
285             my $sum = $n1 + $n2;
286             my $k4 = ( ( 2 * $n1 * $n2 ) / $sum**4 ) * (
287             ( ( 48 * ( 5 * $sum - 6 ) * $n1**3 * $n2**3 ) / ( $sum**2 * $sum**2 ) )
288             - (
289             ( 48 * ( 2 * $sum**2 + 3 * $sum - 6 ) * $n1**2 * $n2**2 ) /
290             ( $sum**2 * $sum )
291             ) + (
292             ( 2 * ( 4 * $sum**3 + 45 * $sum**2 - 37 * $sum - 18 ) * $n1 * $n2 )
293             / $sum**2
294             ) - ( 7 * $sum**2 + 13 * $sum - 6 )
295             );
296             return $k4;
297             }
298            
299             =head2 Distribution and tests
300            
301             =head3 pmf
302            
303             $p = $runs->pmf(data => \@data); # or no args to use last pre-loaded data
304             $p = $runs->pmf(observed => 5, freqs => [5, 20]);
305            
306             Implements the runs probability mass function, returning the probability for a particular number of runs given so many dichotomous events (e.g., as in Swed & Eisenhart, 1943, p. 66); i.e., for I' the observed number of runs, I

{I = I'}. The required function parameters are the observed number of runs, and the frequencies (counts) of each state in the sequence, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded). For derivation, see its public internal methods L and L.

307            
308             =cut
309            
310             sub pmf {
311             my ( $self, @args ) = @_;
312             my $args = ref $args[0] ? $args[0] : {@args};
313             my ( $n1, $n2 ) = $self->bi_frequency($args);
314             my $u = $self->observed($args);
315             return _pmf_num( $u, $n1, $n2 ) / _pmf_denom( $n1, $n2 );
316             }
317            
318             =head3 cdf
319            
320             $p = $runs->cdf(data => \@data); # or no args to use last pre-loaded data
321             $p = $runs->cdf(observed => 5, freqs => [5, 20]);
322            
323             Implements the cumulative distribution function for runs, returning the probability of obtaining the observed number of runs or less down to the expected number of 2 (assuming that the two possible events are actually represented in the data), as per Swed & Eisenhart (1943), p. 66; i.e., for I' the observed number of runs, I

{I <= I'}. The summation is over the probability mass function L. The function parameters are the observed number of runs, and the frequencies (counts) of the two events, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded).

324            
325             =cut
326            
327             sub cdf {
328             my ( $self, @args ) = @_;
329             my $args = ref $args[0] ? $args[0] : {@args};
330             my ( $n1, $n2 ) = $self->bi_frequency($args);
331             my $u = $self->observed($args);
332             my $sum = 0;
333             for ( 2 .. $u ) {
334             $sum += _pmf_num( $_, $n1, $n2 );
335             }
336             return $sum / _pmf_denom( $n1, $n2 );
337             }
338            
339             =head3 cdfi
340            
341             $p = $runs->cdfi(data => \@data); # or no args for last pre-loaded data
342             $p = $runs->cdfi(observed => 11, freqs => [5, 11]);
343            
344             Implements the (inverse) cumulative distribution function for runs, returning the probability of obtaining more than the observed number of runs up from the expected number of 2 (assuming that the two possible events are actually represented in the data), as per Swed & Eisenhart (1943), p. 66; ; i.e., for I' the observed number of runs, I

= 1 - I

{I <= I' - 1}. The summation is over the probability mass function L. The function parameters are the observed number of runs, and the frequencies (counts) of the two events, which can be given directly, as above, in the arguments B and B, respectively, or these will be worked out from a given data sequence itself (given here or as pre-loaded).

345            
346             =cut
347            
348             sub cdfi {
349             my ( $self, @args ) = @_;
350             my $args = ref $args[0] ? $args[0] : {@args};
351             my ( $n1, $n2 ) = $self->bi_frequency($args);
352             my $u = $self->observed($args);
353             my $sum = 0;
354             for ( 2 .. $u - 1 ) {
355             $sum += _pmf_num( $_, $n1, $n2 );
356             }
357             return 1 - $sum / _pmf_denom( $n1, $n2 );
358             }
359            
360             =head3 z_value
361            
362             $v = $runs->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "label" or "index" - see observed()
363             $v = $runs->z_value(data => $aref, ccorr => 1);
364             ($zvalue, $pvalue) = $runs->z_value(data => $aref, ccorr => 1, tails => 2); # same but wanting an array, get the p-value too
365            
366             Returns the zscore from a test of runcount deviation, taking the runcount expected away from that observed and dividing by the root expected runcount variance, by default with a continuity correction to expectation. Called wanting an array, returns the z-value with its I

-value for the tails (1 or 2) given.

367            
368             The data to test can already have been Led, or sent directly as an aref keyed as B.
369            
370             Other options are B (for the z_value) and B (for the p_value).
371            
372             I: runcount_zscore, rzs, zscore
373            
374             =cut
375            
376             sub z_value {
377             my ( $self, @args ) = @_;
378             my $args = ref $args[0] ? $args[0] : {@args};
379             my $zed = Statistics::Zed->new();
380             my ( $zval, $pval ) = $zed->zscore(
381             observed => $self->rco($args),
382             expected => $self->rce($args),
383             variance => $self->rcv($args),
384             ccorr => ( defined $args->{'ccorr'} ? $args->{'ccorr'} : 1 ),
385             tails => ( $args->{'tails'} || 2 ),
386             precision_s => $args->{'precision_s'},
387             precision_p => $args->{'precision_p'},
388             );
389             return wantarray ? ( $zval, $pval ) : $zval;
390             }
391             *rzs = \&z_value;
392             *runcount_zscore = \&z_value;
393             *zscore = \&z_value;
394            
395             =head3 p_value
396            
397             $p = $runs->p_value(); # using loaded data and default args
398             $p = $runs->p_value(ccorr => 0|1, tails => 1|2); # normal-approx. for last-loaded data
399             $p = $runs->p_value(exact => 1); # calc combinatorially for observed >= or < than expectation
400             $p = $runs->p_value(data => [1, 0, 1, 1, 0], exact => 1); # given data
401             $p = $runs->p_value(freqs => [12, 12], observed => 8); # no data sequence, specify known params
402            
403             Returns the probability of getting the observed number of runs or a smaller number given the number of each of the two events. By default, a large sample is assumed, and the probability is obtained from the normalized deviation, as given by the L method.
404            
405             If the option B is defined and not zero, then the probability is worked out combinatorially, as per Swed & Eisenhart (1943), Eq. 1, p. 66 (see also Siegal, 1956, Eqs. 6.12a and 6.12b, p. 138). By default, this is a one-tailed test, testing the hypotheses that there are either too many or too few runs relative to chance expectation; the "correct" hypothesis is tested based on the expected value returned by the L method. Setting B => 2 simply doubles the one-tailed I

-value from any of these tests. Output from these tests has been checked against the tables and examples in Swed & Eisenhart (given to 7 decimal places), and found to agree.

406            
407             The option B gives the returned I

-value to so many decimal places.

408            
409             I: test, runs_test, rct
410            
411             =cut
412            
413             sub p_value {
414             my ( $self, @args ) = @_;
415             my $args = ref $args[0] ? $args[0] : {@args};
416             my $pval;
417             if ( $args->{'exact'} ) {
418             $pval =
419             ( $self->observed($args) - $self->rce($args) >= 0 )
420             ? $self->cdfi($args)
421             : $self->cdf($args);
422             $pval *= 2 if $args->{'tails'} and $args->{'tails'} == 2;
423             $pval = sprintf( q{%.} . $args->{'precision_p'} . 'f', $pval )
424             if $args->{'precision_p'};
425             }
426             else {
427             $pval = ( $self->zscore($args) )[1];
428             }
429             return $pval;
430             }
431             *test = \&p_value;
432             *runs_test = \*p_value;
433             *rct = \*p_value;
434            
435             =head3 lrx2
436            
437             Likelihood ratio chi-square test for runs by length.
438            
439             =cut
440            
441             sub lrx2 {
442             my ( $self, @args ) = @_;
443             my $args = ref $args[0] ? $args[0] : {@args};
444             my ( $n1, $n2 ) = $self->bi_frequency($args);
445             return;
446             }
447            
448             =head3 ztest_ok
449            
450             Returns true for the loaded sequence if its constituent sample numbers are sufficient for their expected runs to be normally approximated - using Siegal's (1956, p. 140) rule - ok if I of the two Is are greater than 20.
451            
452             =cut
453            
454             sub ztest_ok {
455             my ( $self, @args ) = @_;
456             my $args = ref $args[0] ? $args[0] : {@args};
457             my ( $n1, $n2 ) = $self->bi_frequency($args);
458             my $retval =
459             $n1 > 20 || $n2 > 20
460             ? 1
461             : 0
462             ; # Siegal's rule (p. 140) - ok if either of the two Ns are greater than 20
463             return $retval;
464             }
465            
466             =head2 Utils
467            
468             Methods used internally, or for returning/printing descriptives, etc., in a bunch.
469            
470             =head3 bi_frequency
471            
472             @freq = $runs->bi_frequency(data => \@data); # or no args if using last pre-loaded data
473            
474             Returns frequency of the two elements - or croaks if there are more than 2, and gives zero for any absent.
475            
476             =cut
477            
478             sub bi_frequency {
479             my ( $self, @args ) = @_;
480             my $args = ref $args[0] ? $args[0] : {@args};
481             carp
482             'Argument named \'trials\' is deprecated; use \'freqs\' to give aref of frequencies per state'
483             if $args->{'trials'};
484             return @{ $args->{'freqs'} } if ref $args->{'freqs'};
485             my $data = _get_data( $self, $args );
486             my %states = ();
487             $states{$_}++ for @{$data}; # hash keying each element with its frequency
488             croak 'Cannot compute runs: More than two states were found in the data: '
489             . join( q{, }, keys %states )
490             if scalar keys %states > 2;
491             my @vals = values %states;
492             $vals[1] = 0 if scalar @vals < 2;
493             return @vals;
494             }
495            
496             =head3 n_max_seq
497            
498             $n = $runs->n_max_seq(); # loaded data
499             $n = $runs->n_max_seq(data => \@data); # this sequence
500             $n = $runs->n_max_seq(observed => int, freqs => [int, int]); # these specs
501            
502             Returns the number of possible sequences for the two given state frequencies. So the proverbial urn contains I1 black balls and I2 white balls, well mixed, and take I1 + I2 drawings from it without replacement, so any sequence has the same probability of occurring; how many different sequences of black and white balls are possible? For the two counts, this is "sum of I1 + I2 I I1", or:
503            
504             =for html

   Nmax = ( N1 + N2 )! / N1!N2!

505            
506             With the usual definition of a probability as M / N, this is the denominator term in the runs L. This does not take into account the probability of obtaining so many of each event, of the proportion of black and white balls in the urn. (That's work and play for another day.)
507            
508             =cut
509            
510             sub n_max_seq {
511             my ( $self, @args ) = @_;
512             my $args = ref $args[0] ? $args[0] : {@args};
513             my @freqs = $self->bi_frequency($args);
514             return _pmf_denom(@freqs);
515             }
516            
517             =head3 m_seq_k
518            
519             $n = $runs->m_seq_k(); # loaded data
520             $n = $runs->m_seq_k(data => \@data); # this sequence
521             $n = $runs->m_seq_k(observed => int, freqs => [int, int]); # these specs
522            
523             Returns the number of sequences that can produce I runs from I elements of a single kind, with all other kinds of elements in the sequence assumed to be of a single kind, under the conditions of L. See Swed and Eisenhart (1943), or barton and David (1958, p. 253). With the usual definition of a probability as M / N, this is the numerator term in the runs L.
524            
525             =cut
526            
527             sub m_seq_k {
528             my ( $self, @args ) = @_;
529             my $args = ref $args[0] ? $args[0] : {@args};
530             my $u = $self->observed($args);
531             my @freqs = $self->bi_frequency($args);
532             return _pmf_num( $u, @freqs );
533             }
534            
535             =head3 stats_hash
536            
537             $href = $runs->stats_hash(values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1}, exact => 0, ccorr => 1);
538            
539             Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them (e.g., exact for p_value). See L for details. If calling via a "runs" object, the option "stat => 'runs'" is not needed (unlike when using the parent "sequences" object).
540            
541             =head3 dump
542            
543             $runs->dump(values => { observed => 1, variance => 1, p_value => 1}, exact => 1, flag => 1, precision_s => 3); # among other options
544            
545             Print Runs-test results to STDOUT. See L for details of what stats to dump (default is observed() and p_value()). Optionally also give the data directly.
546            
547             =cut
548            
549             sub dump {
550             my ( $self, @args ) = @_;
551             my $args = ref $args[0] ? $args[0] : {@args};
552             $args->{'stat'} = 'runs';
553             $self->SUPER::dump($args);
554             return;
555             }
556            
557             =head3 dump_data
558            
559             $runs->dump_data(delim => "\n"); # print whatevers loaded (or specify by label, index, or as "data")
560            
561             See L for details.
562            
563             =cut
564            
565             # Private methods:
566            
567             sub _pmf_num {
568             my ( $u, $m, $n ) = @_;
569             my $f;
570             if ( is_even($u) ) {
571             my $k = $u / 2 - 1;
572             $f = 2 * _choose( $m - 1, $k ) * _choose( $n - 1, $k );
573             }
574             else {
575             my $k = ( $u + 1 ) / 2;
576             $f =
577             _choose( $m - 1, $k - 1 ) * _choose( $n - 1, $k - 2 ) +
578             _choose( $m - 1, $k - 2 ) * _choose( $n - 1, $k - 1 );
579             }
580             return $f;
581             }
582            
583             sub _pmf_denom {
584             return _choose( sum(@_), $_[0] );
585            
586             #return _factorial(sum(@_)) / ( _factorial($_[0]) * _factorial($_[1]) );
587             }
588            
589             sub _choose { # from Orwant et al., p. 573
590             my ( $n, $k ) = @_;
591             my ( $res, $j ) = ( 1, 1 );
592             return 0 if $k > $n || $k < 0;
593             $k = ( $n - $k ) if ( $n - $k ) < $k;
594             while ( $j <= $k ) {
595             $res *= $n--;
596             $res /= $j++;
597             }
598             return $res;
599             }
600            
601             sub _factorial {
602             my ( $n, $res ) = ( shift, 1 );
603             return undef unless $n >= 0 and $n == int($n);
604             $res *= $n-- while $n > 1;
605             return $res;
606             }
607            
608             sub _get_data {
609             my ( $self, $args ) = @_;
610             return ref $args->{'data'} ? $args->{'data'} : $self->read($args);
611             }
612            
613             1;
614            
615             __END__