File Coverage

blib/lib/Statistics/ANOVA/Page.pm
Criterion Covered Total %
statement 69 81 85.1
branch 4 8 50.0
condition 1 2 50.0
subroutine 17 20 85.0
pod 7 7 100.0
total 98 118 83.0


line stmt bran cond sub pod time code
1             package Statistics::ANOVA::Page;
2            
3 2     2   33190 use 5.006;
  2         5  
  2         80  
4 2     2   9 use strict;
  2         2  
  2         65  
5 2     2   6 use warnings FATAL => 'all';
  2         8  
  2         77  
6 2     2   12 use base qw(Statistics::Data);
  2         1  
  2         1188  
7 2     2   50727 use List::AllUtils qw(sum0);
  2         4  
  2         111  
8 2     2   1199 use Math::Cephes qw(:dists);
  2         10950  
  2         711  
9 2     2   1066 use Statistics::Data::Rank;
  2         6080  
  2         55  
10 2     2   1063 use Statistics::Zed;
  2         4309  
  2         1418  
11            
12             =head1 NAME
13            
14             Statistics::ANOVA::Page - Nonparametric analysis of variance by ranks for trend across dependent variables (Page and sign tests).
15            
16             =head1 VERSION
17            
18             Version 0.01
19            
20             =cut
21            
22             our $VERSION = '0.01';
23            
24             =head1 SYNOPSIS
25            
26             use Statistics::ANOVA::Page;
27             my $page = Statistics::ANOVA::Page->new();
28             $page->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11, 16]}); # note ordinal datanames
29             my $l_value = $page->observed(); # or expected(), variance()
30             my ($z_value, $p_value) = $page->zprob_test(ccorr => 2, tails => 1);
31             # or without pre-loading:
32             $l_value = $page->observed(data => {1 => [2, 4, 6], 2 => [5, 3, 12]});
33             # or for subset of loaded data:
34             $l_value = $page->observed(lab => [1, 3]);
35            
36             =head1 DESCRIPTION
37            
38             Calculates Page statistics for nonparametric analysis of variance across given orders of dependent variables. Ranks are computed exactly as for the L test, but the ranks are weighted according to the ordinal position of the group/level to which they pertain. Also, the test of significance is based on a standardized value, with the I

-value read off the normal distribution. Similarly to the relationship between the Kruskal-Wallis and L tests for non-dependent observations, the Friedman test returns the same value regardless of the ordinality of the variables as levels, but the Page test tests ranks in association with the ordinality of the variables (as levels rather than groups). These are weighted according to their Perl sort { $a <=> $b} order, so they should have sort-able names that reflect the ordering of the variables.

39            
40             With only two groups, the test statistic is equivalent to that provided by a B.
41            
42             Build tests include comparison of return values with published data, viz. from Hollander and Wolfe (1999, p. 286ff); passing these tests means the results agree.
43            
44             =head1 SUBROUTINES/METHODS
45            
46             =head2 new
47            
48             $page = Statistics::ANOVA::Page->new();
49            
50             New object for accessing methods and storing results. This "isa" Statistics::Data object.
51            
52             =head2 load, add, unload
53            
54             $page->load(1 => [1, 4], 2 => [3, 7]);
55            
56             The given data can now be used by any of the following methods. This is inherited from L, and all its other methods are available here via the class object. Only passing of data as a hash of arrays (HOA) is supported for now. Alternatively, give each of the following methods the HOA for the optional named argument B.
57            
58             =head2 observed
59            
60             $val = $page->observed(); # data pre-loaded
61             $val = $page->observed(data => $hashref_of_arefs);
62            
63             Returns the observed statistic I based on within-group rankings of the data weighted according to the ordinal position of the variable (by its numerical name) to which they pertain.
64            
65             Optionally, if the data have not been pre-loaded, send as named argument B.
66            
67             =cut
68            
69             sub observed {
70 1     1 1 802 my ( $self, %args ) = @_;
71 1         5 return _calc_l_value( _get_data( $self, %args ) );
72             }
73            
74             =head2 observed_r
75            
76             $val = $page->observed_r(); # data pre-loaded
77             $val = $page->observed_r(data => $hashref_of_arefs);
78            
79             This implements a "l2r" transformation: Hollander and Wolfe (1999) describe how Page's I-statistic is directly related to Spearman's rank-order correlation coefficient (see L), based on the observed and predicted order of each associated group/level per observation.
80            
81             =cut
82            
83             sub observed_r {
84 0     0 1 0 my ( $self, %args ) = @_;
85 0         0 return _calc_l2r_value( _get_data( $self, %args ) );
86             }
87            
88             =head2 expected
89            
90             $val = $page->expected(); # data pre-loaded
91             $val = $page->expected(data => $hashref_of_arefs);
92            
93             Returns the expected value of the I statistic for the given data.
94            
95             =cut
96            
97             sub expected {
98 1     1 1 7 my ( $self, %args ) = @_;
99 1         3 return _calc_l_exp( _get_data( $self, %args ) );
100             }
101            
102             =head2 variance
103            
104             $val = $page->variance(); # data pre-loaded
105             $val = $page->variance(data => $hashref_of_arefs);
106            
107             Return the variance expected to occur in the I values for the given data.
108            
109             =cut
110            
111             sub variance {
112 1     1 1 5 my ( $self, %args ) = @_;
113 1         2 return _calc_l_var( _get_data( $self, %args ) );
114             }
115            
116             =head2 zprob_test
117            
118             $p_val = $page->zprob_test(); # data pre-loaded
119             $p_val = $page->zprob_test(data => $hashref_of_arefs);
120             ($z_val, $p_val) = $page->zprob_test(); # get z-score too
121            
122             Calculates an expected I value and variance, to provide a normalized I for which the I

-value is read off the normal distribution. This is appropriate for "large" samples. Optional arguments are B and B as in L.

123            
124             =cut
125            
126             sub zprob_test {
127 1     1 1 5 my ( $self, %args ) = @_;
128 1         4 my $href = _get_data( $self, %args );
129 1         6 my $zed = Statistics::Zed->new();
130 1         24 my ( $z_value, $p_value ) = $zed->z_value(
131             observed => _calc_l_value($href),
132             expected => _calc_l_exp($href),
133             variance => _calc_l_var($href),
134             %args
135             );
136 1 50       173 return wantarray ? ( $z_value, $p_value ) : $p_value;
137             }
138            
139             =head2 chiprob_test
140            
141             $p_val = $page->chiprob_test(); # data pre-loaded
142             $p_val = $page->chiprob_test(data => $hashref_of_arefs);
143             ($chi_val, $p_val) = $page->chiprob_test(); # get z-score too
144            
145             Calculates a chi-square statistic based on the observed value of , the number of ranked variables, and the number of replications; as per Page(1963, Eq. 4). This is a two-tailed test; if the optional argument B => 1, the returned probability, read off the chi-square distribution, is halved.
146            
147             =cut
148            
149             sub chiprob_test {
150 1     1 1 1703 my ( $self, %args ) = @_;
151 1         4 my $data_href = _get_data( $self, %args );
152 1         3 my $l = _calc_l_value($data_href);
153 1         2 my $n_bt = scalar keys %{$data_href};
  1         2  
154 1         4 my $n_wt = __PACKAGE__->equal_n( data => $data_href );
155 1         22 my $num = ( ( 12 * $l ) - ( 3 * $n_wt * $n_bt ) * ( $n_bt + 1 )**2 )**2;
156 1         3 my $chi =
157             $num / ( ( $n_wt * $n_bt**2 ) * ( $n_bt**2 - 1 ) * ( $n_bt + 1 ) );
158 1         18 my $p_value = chdtrc( 1, $chi ); # Math::Cephes fn
159 1   50     6 $args{'tails'} ||= 2;
160 1 50       4 $p_value /= 2 if $args{'tails'} == 1;
161 1 50       5 return wantarray ? ( $chi, 1, ( $n_bt * $n_wt ), $p_value ) : $p_value;
162             }
163            
164             =head2 chiprob_str
165            
166             $str = $page->chiprob_str(data => HOA, correct_ties => 1);
167            
168             Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
169            
170             =cut
171            
172             sub chiprob_str {
173 0     0 1 0 my ( $self, %args ) = ( shift, @_ );
174 0         0 my ( $chi_value, $df, $count, $p_value ) = $self->chiprob_test(%args);
175 0         0 return "chi^2($df, N = $count) = $chi_value, p = $p_value";
176             }
177            
178             sub _calc_l_value {
179 3     3   2 my $data = shift;
180 3         14 my $ranks = Statistics::Data::Rank->sum_of_ranks_within( data => $data );
181 3         696 my $c = 0;
182 15         24 return sum0(
183 22         21 map { ++$c * $ranks->{$_} }
184 3         4 sort { $a <=> $b } keys %{$ranks}
  3         9  
185             );
186             }
187            
188             sub _calc_l2r_value {
189 0     0   0 my $data = shift;
190 0         0 my $l = _calc_l_value($data);
191 0         0 my $n_bt = scalar keys %{$data};
  0         0  
192 0         0 my $n_wt = __PACKAGE__->equal_n( data => $data );
193 0         0 return ( ( ( 12 * $l ) / ( $n_wt * $n_bt * ( $n_bt**2 - 1 ) ) ) -
194             ( ( 3 * ( $n_bt + 1 ) ) / ( $n_bt - 1 ) ) );
195             }
196            
197             sub _calc_l_exp {
198 2     2   3 my $data = shift;
199 2         2 my $n_bt = scalar keys %{$data};
  2         3  
200 2         12 my $n_wt = __PACKAGE__->equal_n( data => $data );
201 2         73 return ( $n_wt * $n_bt * ( $n_bt + 1 )**2 ) / 4;
202             }
203            
204             sub _calc_l_var {
205 2     2   3 my $data = shift;
206 2         2 my $n_bt = scalar keys %{$data};
  2         3  
207 2         5 my $n_wt = __PACKAGE__->equal_n( data => $data );
208 2         44 return ( $n_wt * $n_bt**2 * ( $n_bt + 1 ) * ( $n_bt**2 - 1 ) ) / 144;
209             }
210            
211             sub _get_data {
212 5     5   5 my ( $self, %args ) = @_;
213 5         6 my $hoa;
214 5 50       8 if ( ref $args{'data'} ) {
215 0         0 $hoa = delete $args{'data'};
216             }
217             else {
218 5         16 $hoa = $self->get_hoa_by_lab(%args);
219             }
220 5         225 return $hoa;
221             }
222            
223             =head1 REFERENCES
224            
225             Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
226            
227             Page, E. B. (1963). Ordered hypotheses for multiple treatments: A significance test for linear ranks. I, I<58>, 216-230. doi: L<10.1080/01621459.1963.10500843|http://dx.doi.org/10.1080/01621459.1963.10500843>. [L]
228            
229             =head1 DEPENDENCIES
230            
231             L : provides the handy sum0() function
232            
233             L : used for probability functions.
234            
235             L : used as a C for caching and retrieving data.
236            
237             L : used to implement cross-case ranking.
238            
239             L : for z-testing with optional continuity correction and tailing.
240            
241             =head1 AUTHOR
242            
243             Roderick Garton, C<< >>
244            
245             =head1 BUGS
246            
247             Please report any bugs or feature requests to C, or through
248             the web interface at L. I will be notified, and then you'll
249             automatically be notified of progress on your bug as I make changes.
250            
251             =head1 SUPPORT
252            
253             You can find documentation for this module with the perldoc command.
254            
255             perldoc Statistics::ANOVA::Page
256            
257            
258             You can also look for information at:
259            
260             =over 4
261            
262             =item * RT: CPAN's request tracker (report bugs here)
263            
264             L
265            
266             =item * AnnoCPAN: Annotated CPAN documentation
267            
268             L
269            
270             =item * CPAN Ratings
271            
272             L
273            
274             =item * Search CPAN
275            
276             L
277            
278             =back
279            
280            
281             =head1 ACKNOWLEDGEMENTS
282            
283            
284             =head1 LICENSE AND COPYRIGHT
285            
286             Copyright 2015 Roderick Garton.
287            
288             This program is free software; you can redistribute it and/or modify it
289             under the terms of either: the GNU General Public License as published
290             by the Free Software Foundation; or the Artistic License.
291            
292             See L for more information.
293            
294            
295             =cut
296            
297             1; # End of Statistics::ANOVA::Page