File Coverage

blib/lib/Statistics/ANOVA/Page.pm
Criterion Covered Total %
statement 75 88 85.2
branch 5 10 50.0
condition 2 5 40.0
subroutine 19 22 86.3
pod 7 7 100.0
total 108 132 81.8


line stmt bran cond sub pod time code
1             package Statistics::ANOVA::Page;
2            
3 2     2   27029 use 5.006;
  2         4  
4 2     2   6 use strict;
  2         2  
  2         41  
5 2     2   10 use warnings FATAL => 'all';
  2         4  
  2         76  
6 2     2   6 use base qw(Statistics::Data);
  2         2  
  2         1058  
7 2     2   44439 use Carp qw(croak);
  2         5  
  2         103  
8 2     2   8 use List::AllUtils qw(sum0);
  2         2  
  2         67  
9 2     2   1899 use Math::Cephes qw(:dists);
  2         8724  
  2         500  
10 2     2   831 use Statistics::Data::Rank;
  2         5020  
  2         51  
11 2     2   941 use Statistics::Zed;
  2         3530  
  2         1185  
12             $Statistics::ANOVA::Page::VERSION = '0.02';
13            
14             =head1 NAME
15            
16             Statistics::ANOVA::Page - Nonparametric analysis of variance by ranks for trend across repeatedly measured variables (Page and sign tests).
17            
18             =head1 VERSION
19            
20             This is documentation for B of Statistics::ANOVA::Page.
21            
22             =head1 SYNOPSIS
23            
24             use Statistics::ANOVA::Page;
25             my $page = Statistics::ANOVA::Page->new();
26             $page->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11, 16]}); # note ordinal datanames
27             my $l_value = $page->observed(); # or expected(), variance()
28             my ($z_value, $p_value) = $page->zprob_test(ccorr => 2, tails => 1);
29             # or without pre-loading:
30             $l_value = $page->observed(data => {1 => [2, 4, 6], 2 => [5, 3, 12]});
31             # or for subset of loaded data:
32             $l_value = $page->observed(lab => [1, 3]);
33            
34             =head1 DESCRIPTION
35            
36             Calculates Page statistics for nonparametric analysis of variance across given orders of repeatedly measured variables. Ranks are computed exactly as for the L test, but the ranks are weighted according to the ordinal position of the group/level to which they pertain. Also, the test of significance is based on a standardized value, with the I

-value read off the normal distribution. Similarly to the relationship between the Kruskal-Wallis and L tests for non-dependent observations, the Friedman test returns the same value regardless of the ordinality of the variables as levels, but the Page test is of ranks in association with the ordinality of the variables (as levels rather than groups). These are weighted according to their Perl sort { $a <=> $b} order, so they should have sort-able names that reflect the ordering of the variables.

37            
38             With only two groups, the test statistic is equivalent to that provided by a B.
39            
40             Build tests include comparison of return values with published data, viz. from Hollander and Wolfe (1999, p. 286ff); passing these tests means the results agree.
41            
42             =head1 SUBROUTINES/METHODS
43            
44             =head2 new
45            
46             $page = Statistics::ANOVA::Page->new();
47            
48             New object for accessing methods and storing results. This "isa" Statistics::Data object.
49            
50             =head2 load, add, unload
51            
52             $page->load(1 => [1, 4], 2 => [3, 7]);
53            
54             The given data can now be used by any of the following methods. This is inherited from L, and all its other methods are available here via the class object. Only passing of data as a hash of arrays (HOA) is supported for now. Alternatively, give each of the following methods the HOA for the optional named argument B.
55            
56             =head2 observed
57            
58             $val = $page->observed(); # data pre-loaded
59             $val = $page->observed(data => $hashref_of_arefs);
60            
61             Returns the observed statistic I based on within-group rankings of the data weighted according to the ordinal position of the variable (by its numerical name) to which they pertain.
62            
63             Optionally, if the data have not been pre-loaded, send as named argument B.
64            
65             =cut
66            
67             sub observed {
68 1     1 1 489 my ( $self, %args ) = @_;
69 1         4 return _calc_l_value( _get_data( $self, %args ) );
70             }
71            
72             =head2 observed_r
73            
74             $val = $page->observed_r(); # data pre-loaded
75             $val = $page->observed_r(data => $hashref_of_arefs);
76            
77             This implements a "l2r" transformation: Hollander and Wolfe (1999) describe how Page's I-statistic is directly related to Spearman's rank-order correlation coefficient (see L), based on the observed and predicted order of each associated group/level per observation.
78            
79             =cut
80            
81             sub observed_r {
82 0     0 1 0 my ( $self, %args ) = @_;
83 0         0 return _calc_l2r_value( _get_data( $self, %args ) );
84             }
85            
86             =head2 expected
87            
88             $val = $page->expected(); # data pre-loaded
89             $val = $page->expected(data => $hashref_of_arefs);
90            
91             Returns the expected value of the I statistic for the given data.
92            
93             =cut
94            
95             sub expected {
96 1     1 1 7 my ( $self, %args ) = @_;
97 1         2 return _calc_l_exp( _get_data( $self, %args ) );
98             }
99            
100             =head2 variance
101            
102             $val = $page->variance(); # data pre-loaded
103             $val = $page->variance(data => $hashref_of_arefs);
104            
105             Return the variance expected to occur in the I values for the given data.
106            
107             =cut
108            
109             sub variance {
110 1     1 1 5 my ( $self, %args ) = @_;
111 1         3 return _calc_l_var( _get_data( $self, %args ) );
112             }
113            
114             =head2 zprob_test
115            
116             $p_val = $page->zprob_test(); # data pre-loaded
117             $p_val = $page->zprob_test(data => $hashref_of_arefs);
118             ($z_val, $p_val) = $page->zprob_test(); # get z-score too
119            
120             Calculates an expected I value and variance, to provide a normalized I for which the I

-value is read off the normal distribution. This is appropriate for "large" samples. Optional arguments are B and B as in L.

121            
122             =cut
123            
124             sub zprob_test {
125 1     1 1 5 my ( $self, %args ) = @_;
126 1         16 my $href = _get_data( $self, %args );
127 1         6 my $zed = Statistics::Zed->new();
128 1         20 my ( $z_value, $p_value ) = $zed->z_value(
129             observed => _calc_l_value($href),
130             expected => _calc_l_exp($href),
131             variance => _calc_l_var($href),
132             %args
133             );
134 1 50       149 return wantarray ? ( $z_value, $p_value ) : $p_value;
135             }
136            
137             =head2 chiprob_test
138            
139             $p_val = $page->chiprob_test(); # data pre-loaded
140             $p_val = $page->chiprob_test(data => $hashref_of_arefs);
141             ($chi_val, $df, $num, $p_val) = $page->chiprob_test();
142            
143             Calculates a chi-square statistic based on the observed value of I, the number of ranked variables, and the number of replications; as per Page(1963, Eq. 4). This is a two-tailed test; if the optional argument B => 1, the returned probability, read off the chi-square distribution, is halved. Called in scalar context, returns the I

-value alone. Called in list context, returns the chi-square value, the degrees-of-freedom, number of observations, and then the I

-value.

144            
145             =cut
146            
147             sub chiprob_test {
148 1     1 1 778 my ( $self, %args ) = @_;
149 1         2 my $data_href = _get_data( $self, %args );
150 1         3 my $l = _calc_l_value($data_href);
151 1         3 my $n_bt = scalar keys %{$data_href};
  1         2  
152 1         9 my $n_wt = __PACKAGE__->equal_n( data => $data_href );
153 1         20 my $num = ( ( 12 * $l ) - ( 3 * $n_wt * $n_bt ) * ( $n_bt + 1 )**2 )**2;
154 1         2 my $den = ( ( $n_wt * $n_bt**2 ) * ( $n_bt**2 - 1 ) * ( $n_bt + 1 ) );
155 1 50       3 croak 'Chi-square probability test not available given limited number of observations' if ! $den;
156 1         2 my $chi = $num / $den;
157 1         17 my $p_value = _set_tails( chdtrc( 1, $chi ), $args{'tails'} ); # Math::Cephes fn
158 1 50       4 return wantarray ? ( $chi, 1, ( $n_bt * $n_wt ), $p_value ) : $p_value;
159             }
160            
161             =head2 chiprob_str
162            
163             $str = $page->chiprob_str(data => HOA, correct_ties => 1);
164            
165             Performs L and returns a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
166            
167             =cut
168            
169             sub chiprob_str {
170 0     0 1 0 my ( $self, %args ) = @_;
171 0         0 my ( $chi_value, $df, $count, $p_value ) = $self->chiprob_test(%args);
172 0         0 return "chi^2($df, N = $count) = $chi_value, p = $p_value";
173             }
174            
175             sub _calc_l_value {
176 3     3   3 my $data = shift;
177 3         12 my $ranks = Statistics::Data::Rank->sum_of_ranks_within( data => $data );
178 3         1069 my $c = 0;
179             return sum0(
180 15         19 map { ++$c * $ranks->{$_} }
181 3         4 sort { $a <=> $b } keys %{$ranks}
  23         18  
  3         8  
182             );
183             }
184            
185             sub _calc_l2r_value {
186 0     0   0 my $data = shift;
187 0         0 my $l = _calc_l_value($data);
188 0         0 my $n_bt = scalar keys %{$data};
  0         0  
189 0         0 my $n_wt = __PACKAGE__->equal_n( data => $data );
190 0         0 return ( ( ( 12 * $l ) / ( $n_wt * $n_bt * ( $n_bt**2 - 1 ) ) ) -
191             ( ( 3 * ( $n_bt + 1 ) ) / ( $n_bt - 1 ) ) );
192             }
193            
194             sub _calc_l_exp {
195 2     2   2 my $data = shift;
196 2         2 my $n_bt = scalar keys %{$data};
  2         3  
197 2         7 my $n_wt = __PACKAGE__->equal_n( data => $data );
198 2         43 return ( $n_wt * $n_bt * ( $n_bt + 1 )**2 ) / 4;
199             }
200            
201             sub _calc_l_var {
202 2     2   1 my $data = shift;
203 2         2 my $n_bt = scalar keys %{$data};
  2         4  
204 2         5 my $n_wt = __PACKAGE__->equal_n( data => $data );
205 2         36 return ( $n_wt * $n_bt**2 * ( $n_bt + 1 ) * ( $n_bt**2 - 1 ) ) / 144;
206             }
207            
208             sub _set_tails {
209 1     1   2 my ($p_value, $tails) = @_;
210 1   50     5 $tails ||= 2;
211 1 50 33     7 if (defined $tails and $tails == 1) {
212 0         0 $p_value /= 2;
213             }
214 1         2 return $p_value;
215             }
216            
217             sub _get_data {
218 5     5   4 my ( $self, %args ) = @_;
219 5         5 my $hoa;
220 5 50       8 if ( ref $args{'data'} ) {
221 0         0 $hoa = delete $args{'data'};
222             }
223             else {
224 5         15 $hoa = $self->get_hoa_by_lab(%args);
225             }
226 5         240 return $hoa;
227             }
228            
229             =head1 DIAGNOSTICS
230            
231             =over 4
232            
233             =item Chi-square probability test not available given limited number of observations
234            
235             Ced if the denominator value to calculate the chi-square statistic is not defined or zero, which would arise if there is only one between-group level, or zero observations within the levels.
236            
237             =item Equal number of observations required for calculating ranks within groups
238            
239             Ced via Statistics::Data::Rank *_ranks_within methods given that they need to have the same number of observations per group; as when the different factor levels are repeatedly measured on the same replicants. Most methods require this to be the case.
240            
241             =back
242            
243             =head1 REFERENCES
244            
245             Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
246            
247             Page, E. B. (1963). Ordered hypotheses for multiple treatments: A significance test for linear ranks. I, I<58>, 216-230. doi: L<10.1080/01621459.1963.10500843|http://dx.doi.org/10.1080/01621459.1963.10500843>. [L]
248            
249             =head1 DEPENDENCIES
250            
251             L : provides the handy sum0() function
252            
253             L : used for probability functions.
254            
255             L : used as a C for caching and retrieving data.
256            
257             L : used to implement cross-case ranking.
258            
259             L : for z-testing with optional continuity correction and tailing.
260            
261             =head1 AUTHOR
262            
263             Roderick Garton, C<< >>
264            
265             =head1 BUGS AND LIMITATIONS
266            
267             Please report any bugs or feature requests to C, or through
268             the web interface at L. I will be notified, and then you'll
269             automatically be notified of progress on your bug as I make changes.
270            
271             =head1 SUPPORT
272            
273             You can find documentation for this module with the perldoc command.
274            
275             perldoc Statistics::ANOVA::Page
276            
277            
278             You can also look for information at:
279            
280             =over 4
281            
282             =item * RT: CPAN's request tracker (report bugs here)
283            
284             L
285            
286             =item * AnnoCPAN: Annotated CPAN documentation
287            
288             L
289            
290             =item * CPAN Ratings
291            
292             L
293            
294             =item * Search CPAN
295            
296             L
297            
298             =back
299            
300            
301             =head1 ACKNOWLEDGEMENTS
302            
303            
304             =head1 LICENSE AND COPYRIGHT
305            
306             Copyright 2015-2017 Roderick Garton.
307            
308             This program is free software; you can redistribute it and/or modify it
309             under the terms of either: the GNU General Public License as published
310             by the Free Software Foundation; or the Artistic License.
311            
312             See L for more information.
313            
314            
315             =cut
316            
317             1; # End of Statistics::ANOVA::Page