File Coverage

blib/lib/Statistics/ANOVA/Friedman.pm

Criterion	Covered	Total	%
statement	48	66	72.7
branch	8	18	44.4
condition	4	15	26.6
subroutine	13	16	81.2
pod	4	4	100.0
total	77	119	64.7

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::ANOVA::Friedman;
2
3	5			5		76349	use 5.006;
	5					16
	5					174
4	5			5		27	use strict;
	5					9
	5					156
5	5			5		17	use warnings FATAL => 'all';
	5					12
	5					178
6	5			5		20	use base qw(Statistics::Data);
	5					4
	5					2844
7	5			5		103351	use Carp qw(croak carp);
	5					9
	5					230
8	5			5		21	use List::AllUtils qw(sum0);
	5					7
	5					163
9	5			5		2372	use Math::Cephes qw(:dists);
	5					22457
	5					1414
10	5			5		2278	use Statistics::Data::Rank;
	5					13939
	5					2876
11
12							=head1 NAME
13
14							Statistics::ANOVA::Friedman - nonparametric repeated measures analysis of variance for nominal variables (Friedman Test)
15
16							=head1 VERSION
17
18							This is documentation for version 0.01, released February 2015.
19
20							=cut
21
22							our $VERSION = '0.01';
23
24							=head1 SYNOPSIS
25
26							use Statistics::ANOVA::Friedman;
27							my $fried = Statistics::ANOVA::Friedman->new();
28							my ($chi_value, $df, $count, $p_value) = $fried->chiprob_test(data => HOA);
29							$fried->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11]}); # or pre-load with HOA
30							($chi_value, $df, $count, $p_value) = $fried->chiprob_test();
31							my ($f_value, $df_b, $df_w, $p_value2) = $fried->fprob_test();
32
33							=head2 DESCRIPTION
34
35							Performs the B nonparametric analysis of variance - for two or more dependent (matched, related) nominal variables. A ranking procedure is used, but, unlike the case for independent variables, the ranks are taken at each common index of each variable, i.e., within-groups, given that values at each index are given by the same data-source (person, plot, etc.).
36
37							By default, the method accounts for and corrects for ties, but if B => 0, the test-statistic is uncorrected. The correction involves accounting for the number of tied variables at each index, as per Hollander & Wolfe (1995), Eq. 7.8, p. 274.
38
39							The module uses L as a base so that data can be pre-loaded and otherwise manipulated/queried per that module's methods.
40
41							Correctness of output is tested on installation using example data from Hollander & Wolfe (1999, p. 274ff), Rice (1995, p. 470), Sarantakos (1993, p. 404-405), and Siegal (1956, p. 167ff); tests fail if the published chi-values and degrees-of-freedom are not returned by the module.
42
43							=head1 SUBROUTINES/METHODS
44
45							=head2 chiprob_test
46
47							($chi_value, $df, $count, $p_value) = $fried->chiprob_test(data => HOA, correct_ties => 1);
48
49							Performs the ANOVA and returns the chi-square value, its degrees-of-freedom, the total number of observations, and associated probability value (or only the latter if called in scalar context). Default value of optional argument B is 1.
50
51							=cut
52
53							sub chiprob_test {
54	5			5	1	3914	my ( $self, %args ) = (shift, @_);
55	5	100				41	my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
56	5					165	my $n_bt = scalar keys %{$data};
	5					11
57	5					28	my $n_wt = $self->equal_n(data => $data);
58	5	50	33			157	croak 'Need to have equal numbers of observations greater than 1 per two or more variables for chiprob_test' if not $n_wt or $n_wt == 1or $n_bt < 2;
			33
59	5	100				19	my $chi =
60							_definitely_no($args{'correct_ties'})
61							? _chi_ig_ties( $n_bt, $n_wt, scalar Statistics::Data::Rank->sumsq_ranks_within(data => $data) )
62							: _chi_by_ties( $n_bt, $n_wt, Statistics::Data::Rank->sumsq_ranks_within(data => $data) );
63	5					18	my $df = $n_bt - 1;
64	5					111	my $p_value = chdtrc( $df, $chi ); # Math::Cephes fn
65	5	50				26	return wantarray ? ($chi, $df, ($n_bt * $n_wt), $p_value) : $p_value;
66							}
67
68							=head2 chiprob_str
69
70							$str = $fried->chiprob_str(data => HOA, correct_ties => 1);
71
72							Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
73
74							=cut
75
76							sub chiprob_str {
77	0			0	1	0	my ( $self, %args ) = (shift, @_);
78	0					0	my ($chi_value, $df, $count, $p_value) = $self->chiprob_test(%args);
79	0					0	return "chi^2($df, N = $count) = $chi_value, p = $p_value";
80							}
81
82							=head2 fprob_test
83
84							($f_value, $df_b, $df_w, $p_value) = $fried->fprob_test(data => HOA);
85							$p_value = $fried->fprob_test(data => HOA);
86
87							Performs the same test as above but transforms the chi-value into an F-distributed value, returning this F-equivalent value, between and within groups degrees-of-freedom, and then the associated probability off the F-distribution (or only the latter if called in scalar context). Default value of optional argument B is 1. This method has not been tested against sample data as yet.
88
89							=cut
90
91							sub fprob_test {
92	0			0	1	0	my ( $self, %args ) = (shift, @_);
93	0	0				0	my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
94	0					0	my $n_bt = scalar keys %{$data};
	0					0
95	0					0	my $n_wt = $self->equal_n(data => $data);
96	0	0	0			0	croak 'Need to have equal numbers of observations greater than 1 per two or more variables for fprob_test' if not $n_wt or $n_wt == 1 or $n_bt < 2;
			0
97	0	0				0	my $chi =
98							_definitely_no($args{'correct_ties'})
99							? _chi_ig_ties( $n_bt, $n_wt, scalar Statistics::Data::Rank->sumsq_ranks_within(data => $data) )
100							: _chi_by_ties( $n_bt, $n_wt, Statistics::Data::Rank->sumsq_ranks_within(data => $data) );
101	0					0	my $f_value = ( ( $n_wt - 1 ) * $chi ) / ( $n_wt * ( $n_bt - 1 ) - $chi );
102	0					0	my $df_b = $n_bt - 1;
103	0					0	my $df_w = ( $n_wt - 1 ) * ($df_b);
104	0					0	my $p_value = fdtrc( $df_b, $df_w, $f_value ); # Math::Cephes fn
105	0	0				0	return wantarray ? ($f_value, $df_b, $df_w, $p_value) : $p_value;
106							}
107
108							=head2 fprob_str
109
110							$str = $fried->chiprob_str(data => HOA, correct_ties => 1);
111
112							Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., F(df_b, df_w) = f_value, p = p_value.
113
114							=cut
115
116							sub fprob_str {
117	0			0	1	0	my ( $self, %args ) = (shift, @_);
118	0					0	my ($f_value, $df_b, $df_w, $p_value) = $self->fprob_test(%args);
119	0					0	return "F($df_b, $df_w) = $f_value, p = $p_value";
120							}
121
122							sub _chi_ig_ties {
123	1			1		724	my ( $c, $n, $sumsq ) = @_;
124	1					4	return ( 12 / ( $n * $c * ( $c + 1 ) ) ) * $sumsq - 3 * $n * ( $c + 1 );
125							}
126
127							sub _chi_by_ties {
128	4			4		1909	my ( $c, $n, $sumsq, $xtied ) = @_;
129	4					17	my $num = 12 * $sumsq - 3 * $n*2 $c * ( $c + 1 )**2;
130	4					8	my $sum = sum0( map { _sumcubes( $_ ) - $c } values %{$xtied} );
	40					43
	4					10
131	4					12	my $den = $n * $c * ( $c + 1 ) - ( 1 / ( $c - 1 ) ) * $sum;
132	4					5	my $chi = $num / $den;
133	4					8	return $chi;
134							}
135
136							sub _sumcubes {
137	40			40		26	return sum0( map { $_**3 } @{ shift @_ } );
	158					179
	40					42
138							}
139
140							sub _definitely_no {
141	5	100	66	5		51	return ( defined $_[0] and $_[0] == 0 ) ? 1 : 0;
142							}
143
144							=head1 DEPENDENCIES
145
146							L : used for summing.
147
148							L : used for probability functions.
149
150							L : used as base.
151
152							L : used to calculate the dependent sum-square of ranks. See this module for retrieving the actual arrays of ranks and sum-squares.
153
154							=head1 DIAGNOSTICS
155
156							=over 4
157
158							=item Need to have equal numbers of observations greater than 1 per two or variables for chiprob_test
159
160							Ced if there are not equal numbers of numerical values in each given variable, and if there are not at least two variables. Similarly for fprob_test.
161
162							=back
163
164							=head1 REFERENCES
165
166							Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
167
168							Rice, J. A. (1995). I. Belmont, CA, US: Duxbury.
169
170							Sarantakos, S. (1993). I. Melbourne, Australia: MacMillan.
171
172							Siegal, S. (1956). I. New York, NY, US: McGraw-Hill
173
174							=head1 AUTHOR
175
176							Roderick Garton, C<< >>
177
178							=head1 BUGS
179
180							Please report any bugs or feature requests to C, or through
181							the web interface at L. I will be notified, and then you'll
182							automatically be notified of progress on your bug as I make changes.
183
184							=head1 SUPPORT
185
186							You can find documentation for this module with the perldoc command.
187
188							perldoc Statistics::ANOVA::Friedman
189
190							You can also look for information at:
191
192							=over 4
193
194							=item * RT: CPAN's request tracker (report bugs here)
195
196							L
197
198							=item * AnnoCPAN: Annotated CPAN documentation
199
200							L
201
202							=item * CPAN Ratings
203
204							L
205
206							=item * Search CPAN
207
208							L
209
210							=back
211
212							=head1 LICENSE AND COPYRIGHT
213
214							Copyright 2015 Roderick Garton.
215
216							This program is free software; you can redistribute it and/or modify it
217							under the terms of either: the GNU General Public License as published
218							by the Free Software Foundation; or the Artistic License.
219
220							See L for more information.
221
222							=cut
223
224							1; # End of Statistics::ANOVA::Friedman