File Coverage

blib/lib/Statistics/ANOVA/Page.pm

Criterion	Covered	Total	%
statement	69	81	85.1
branch	4	8	50.0
condition	1	2	50.0
subroutine	17	20	85.0
pod	7	7	100.0
total	98	118	83.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::ANOVA::Page;
2
3	2			2		33190	use 5.006;
	2					5
	2					80
4	2			2		9	use strict;
	2					2
	2					65
5	2			2		6	use warnings FATAL => 'all';
	2					8
	2					77
6	2			2		12	use base qw(Statistics::Data);
	2					1
	2					1188
7	2			2		50727	use List::AllUtils qw(sum0);
	2					4
	2					111
8	2			2		1199	use Math::Cephes qw(:dists);
	2					10950
	2					711
9	2			2		1066	use Statistics::Data::Rank;
	2					6080
	2					55
10	2			2		1063	use Statistics::Zed;
	2					4309
	2					1418
11
12							=head1 NAME
13
14							Statistics::ANOVA::Page - Nonparametric analysis of variance by ranks for trend across dependent variables (Page and sign tests).
15
16							=head1 VERSION
17
18							Version 0.01
19
20							=cut
21
22							our $VERSION = '0.01';
23
24							=head1 SYNOPSIS
25
26							use Statistics::ANOVA::Page;
27							my $page = Statistics::ANOVA::Page->new();
28							$page->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11, 16]}); # note ordinal datanames
29							my $l_value = $page->observed(); # or expected(), variance()
30							my ($z_value, $p_value) = $page->zprob_test(ccorr => 2, tails => 1);
31							# or without pre-loading:
32							$l_value = $page->observed(data => {1 => [2, 4, 6], 2 => [5, 3, 12]});
33							# or for subset of loaded data:
34							$l_value = $page->observed(lab => [1, 3]);
35
36							=head1 DESCRIPTION
37
38							Calculates Page statistics for nonparametric analysis of variance across given orders of dependent variables. Ranks are computed exactly as for the L test, but the ranks are weighted according to the ordinal position of the group/level to which they pertain. Also, the test of significance is based on a standardized value, with the I -value read off the normal distribution. Similarly to the relationship between the Kruskal-Wallis and L tests for non-dependent observations, the Friedman test returns the same value regardless of the ordinality of the variables as levels, but the Page test tests ranks in association with the ordinality of the variables (as levels rather than groups). These are weighted according to their Perl sort { $a <=> $b} order, so they should have sort-able names that reflect the ordering of the variables.
39
40							With only two groups, the test statistic is equivalent to that provided by a B.
41
42							Build tests include comparison of return values with published data, viz. from Hollander and Wolfe (1999, p. 286ff); passing these tests means the results agree.
43
44							=head1 SUBROUTINES/METHODS
45
46							=head2 new
47
48							$page = Statistics::ANOVA::Page->new();
49
50							New object for accessing methods and storing results. This "isa" Statistics::Data object.
51
52							=head2 load, add, unload
53
54							$page->load(1 => [1, 4], 2 => [3, 7]);
55
56							The given data can now be used by any of the following methods. This is inherited from L, and all its other methods are available here via the class object. Only passing of data as a hash of arrays (HOA) is supported for now. Alternatively, give each of the following methods the HOA for the optional named argument B.
57
58							=head2 observed
59
60							$val = $page->observed(); # data pre-loaded
61							$val = $page->observed(data => $hashref_of_arefs);
62
63							Returns the observed statistic I based on within-group rankings of the data weighted according to the ordinal position of the variable (by its numerical name) to which they pertain.
64
65							Optionally, if the data have not been pre-loaded, send as named argument B.
66
67							=cut
68
69							sub observed {
70	1			1	1	802	my ( $self, %args ) = @_;
71	1					5	return _calc_l_value( _get_data( $self, %args ) );
72							}
73
74							=head2 observed_r
75
76							$val = $page->observed_r(); # data pre-loaded
77							$val = $page->observed_r(data => $hashref_of_arefs);
78
79							This implements a "l2r" transformation: Hollander and Wolfe (1999) describe how Page's I-statistic is directly related to Spearman's rank-order correlation coefficient (see L), based on the observed and predicted order of each associated group/level per observation.
80
81							=cut
82
83							sub observed_r {
84	0			0	1	0	my ( $self, %args ) = @_;
85	0					0	return _calc_l2r_value( _get_data( $self, %args ) );
86							}
87
88							=head2 expected
89
90							$val = $page->expected(); # data pre-loaded
91							$val = $page->expected(data => $hashref_of_arefs);
92
93							Returns the expected value of the I statistic for the given data.
94
95							=cut
96
97							sub expected {
98	1			1	1	7	my ( $self, %args ) = @_;
99	1					3	return _calc_l_exp( _get_data( $self, %args ) );
100							}
101
102							=head2 variance
103
104							$val = $page->variance(); # data pre-loaded
105							$val = $page->variance(data => $hashref_of_arefs);
106
107							Return the variance expected to occur in the I values for the given data.
108
109							=cut
110
111							sub variance {
112	1			1	1	5	my ( $self, %args ) = @_;
113	1					2	return _calc_l_var( _get_data( $self, %args ) );
114							}
115
116							=head2 zprob_test
117
118							$p_val = $page->zprob_test(); # data pre-loaded
119							$p_val = $page->zprob_test(data => $hashref_of_arefs);
120							($z_val, $p_val) = $page->zprob_test(); # get z-score too
121
122							Calculates an expected I value and variance, to provide a normalized I for which the I -value is read off the normal distribution. This is appropriate for "large" samples. Optional arguments are B and B as in L.
123
124							=cut
125
126							sub zprob_test {
127	1			1	1	5	my ( $self, %args ) = @_;
128	1					4	my $href = _get_data( $self, %args );
129	1					6	my $zed = Statistics::Zed->new();
130	1					24	my ( $z_value, $p_value ) = $zed->z_value(
131							observed => _calc_l_value($href),
132							expected => _calc_l_exp($href),
133							variance => _calc_l_var($href),
134							%args
135							);
136	1	50				173	return wantarray ? ( $z_value, $p_value ) : $p_value;
137							}
138
139							=head2 chiprob_test
140
141							$p_val = $page->chiprob_test(); # data pre-loaded
142							$p_val = $page->chiprob_test(data => $hashref_of_arefs);
143							($chi_val, $p_val) = $page->chiprob_test(); # get z-score too
144
145							Calculates a chi-square statistic based on the observed value of , the number of ranked variables, and the number of replications; as per Page(1963, Eq. 4). This is a two-tailed test; if the optional argument B => 1, the returned probability, read off the chi-square distribution, is halved.
146
147							=cut
148
149							sub chiprob_test {
150	1			1	1	1703	my ( $self, %args ) = @_;
151	1					4	my $data_href = _get_data( $self, %args );
152	1					3	my $l = _calc_l_value($data_href);
153	1					2	my $n_bt = scalar keys %{$data_href};
	1					2
154	1					4	my $n_wt = __PACKAGE__->equal_n( data => $data_href );
155	1					22	my $num = ( ( 12 * $l ) - ( 3 * $n_wt * $n_bt ) * ( $n_bt + 1 )2 )2;
156	1					3	my $chi =
157							$num / ( ( $n_wt * $n_bt*2 ) ( $n_bt*2 - 1 ) ( $n_bt + 1 ) );
158	1					18	my $p_value = chdtrc( 1, $chi ); # Math::Cephes fn
159	1		50			6	$args{'tails'} \|\|= 2;
160	1	50				4	$p_value /= 2 if $args{'tails'} == 1;
161	1	50				5	return wantarray ? ( $chi, 1, ( $n_bt * $n_wt ), $p_value ) : $p_value;
162							}
163
164							=head2 chiprob_str
165
166							$str = $page->chiprob_str(data => HOA, correct_ties => 1);
167
168							Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
169
170							=cut
171
172							sub chiprob_str {
173	0			0	1	0	my ( $self, %args ) = ( shift, @_ );
174	0					0	my ( $chi_value, $df, $count, $p_value ) = $self->chiprob_test(%args);
175	0					0	return "chi^2($df, N = $count) = $chi_value, p = $p_value";
176							}
177
178							sub _calc_l_value {
179	3			3		2	my $data = shift;
180	3					14	my $ranks = Statistics::Data::Rank->sum_of_ranks_within( data => $data );
181	3					696	my $c = 0;
182	15					24	return sum0(
183	22					21	map { ++$c * $ranks->{$_} }
184	3					4	sort { $a <=> $b } keys %{$ranks}
	3					9
185							);
186							}
187
188							sub _calc_l2r_value {
189	0			0		0	my $data = shift;
190	0					0	my $l = _calc_l_value($data);
191	0					0	my $n_bt = scalar keys %{$data};
	0					0
192	0					0	my $n_wt = __PACKAGE__->equal_n( data => $data );
193	0					0	return ( ( ( 12 * $l ) / ( $n_wt * $n_bt * ( $n_bt**2 - 1 ) ) ) -
194							( ( 3 * ( $n_bt + 1 ) ) / ( $n_bt - 1 ) ) );
195							}
196
197							sub _calc_l_exp {
198	2			2		3	my $data = shift;
199	2					2	my $n_bt = scalar keys %{$data};
	2					3
200	2					12	my $n_wt = __PACKAGE__->equal_n( data => $data );
201	2					73	return ( $n_wt * $n_bt * ( $n_bt + 1 )**2 ) / 4;
202							}
203
204							sub _calc_l_var {
205	2			2		3	my $data = shift;
206	2					2	my $n_bt = scalar keys %{$data};
	2					3
207	2					5	my $n_wt = __PACKAGE__->equal_n( data => $data );
208	2					44	return ( $n_wt * $n_bt*2 ( $n_bt + 1 ) * ( $n_bt**2 - 1 ) ) / 144;
209							}
210
211							sub _get_data {
212	5			5		5	my ( $self, %args ) = @_;
213	5					6	my $hoa;
214	5	50				8	if ( ref $args{'data'} ) {
215	0					0	$hoa = delete $args{'data'};
216							}
217							else {
218	5					16	$hoa = $self->get_hoa_by_lab(%args);
219							}
220	5					225	return $hoa;
221							}
222
223							=head1 REFERENCES
224
225							Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
226
227							Page, E. B. (1963). Ordered hypotheses for multiple treatments: A significance test for linear ranks. I, I<58>, 216-230. doi: L<10.1080/01621459.1963.10500843\|http://dx.doi.org/10.1080/01621459.1963.10500843>. [L]
228
229							=head1 DEPENDENCIES
230
231							L : provides the handy sum0() function
232
233							L : used for probability functions.
234
235							L : used as a C for caching and retrieving data.
236
237							L : used to implement cross-case ranking.
238
239							L : for z-testing with optional continuity correction and tailing.
240
241							=head1 AUTHOR
242
243							Roderick Garton, C<< >>
244
245							=head1 BUGS
246
247							Please report any bugs or feature requests to C, or through
248							the web interface at L. I will be notified, and then you'll
249							automatically be notified of progress on your bug as I make changes.
250
251							=head1 SUPPORT
252
253							You can find documentation for this module with the perldoc command.
254
255							perldoc Statistics::ANOVA::Page
256
257
258							You can also look for information at:
259
260							=over 4
261
262							=item * RT: CPAN's request tracker (report bugs here)
263
264							L
265
266							=item * AnnoCPAN: Annotated CPAN documentation
267
268							L
269
270							=item * CPAN Ratings
271
272							L
273
274							=item * Search CPAN
275
276							L
277
278							=back
279
280
281							=head1 ACKNOWLEDGEMENTS
282
283
284							=head1 LICENSE AND COPYRIGHT
285
286							Copyright 2015 Roderick Garton.
287
288							This program is free software; you can redistribute it and/or modify it
289							under the terms of either: the GNU General Public License as published
290							by the Free Software Foundation; or the Artistic License.
291
292							See L for more information.
293
294
295							=cut
296
297							1; # End of Statistics::ANOVA::Page