File Coverage

blib/lib/Statistics/ANOVA/Friedman.pm
Criterion Covered Total %
statement 48 66 72.7
branch 8 18 44.4
condition 4 15 26.6
subroutine 13 16 81.2
pod 4 4 100.0
total 77 119 64.7


line stmt bran cond sub pod time code
1             package Statistics::ANOVA::Friedman;
2            
3 5     5   76349 use 5.006;
  5         16  
  5         174  
4 5     5   27 use strict;
  5         9  
  5         156  
5 5     5   17 use warnings FATAL => 'all';
  5         12  
  5         178  
6 5     5   20 use base qw(Statistics::Data);
  5         4  
  5         2844  
7 5     5   103351 use Carp qw(croak carp);
  5         9  
  5         230  
8 5     5   21 use List::AllUtils qw(sum0);
  5         7  
  5         163  
9 5     5   2372 use Math::Cephes qw(:dists);
  5         22457  
  5         1414  
10 5     5   2278 use Statistics::Data::Rank;
  5         13939  
  5         2876  
11            
12             =head1 NAME
13            
14             Statistics::ANOVA::Friedman - nonparametric repeated measures analysis of variance for nominal variables (Friedman Test)
15            
16             =head1 VERSION
17            
18             This is documentation for version 0.01, released February 2015.
19            
20             =cut
21            
22             our $VERSION = '0.01';
23            
24             =head1 SYNOPSIS
25            
26             use Statistics::ANOVA::Friedman;
27             my $fried = Statistics::ANOVA::Friedman->new();
28             my ($chi_value, $df, $count, $p_value) = $fried->chiprob_test(data => HOA);
29             $fried->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11]}); # or pre-load with HOA
30             ($chi_value, $df, $count, $p_value) = $fried->chiprob_test();
31             my ($f_value, $df_b, $df_w, $p_value2) = $fried->fprob_test();
32            
33             =head2 DESCRIPTION
34            
35             Performs the B nonparametric analysis of variance - for two or more dependent (matched, related) nominal variables. A ranking procedure is used, but, unlike the case for independent variables, the ranks are taken at each common index of each variable, i.e., within-groups, given that values at each index are given by the same data-source (person, plot, etc.).
36            
37             By default, the method accounts for and corrects for ties, but if B => 0, the test-statistic is uncorrected. The correction involves accounting for the number of tied variables at each index, as per Hollander & Wolfe (1995), Eq. 7.8, p. 274.
38            
39             The module uses L as a base so that data can be pre-loaded and otherwise manipulated/queried per that module's methods.
40            
41             Correctness of output is tested on installation using example data from Hollander & Wolfe (1999, p. 274ff), Rice (1995, p. 470), Sarantakos (1993, p. 404-405), and Siegal (1956, p. 167ff); tests fail if the published chi-values and degrees-of-freedom are not returned by the module.
42            
43             =head1 SUBROUTINES/METHODS
44            
45             =head2 chiprob_test
46            
47             ($chi_value, $df, $count, $p_value) = $fried->chiprob_test(data => HOA, correct_ties => 1);
48            
49             Performs the ANOVA and returns the chi-square value, its degrees-of-freedom, the total number of observations, and associated probability value (or only the latter if called in scalar context). Default value of optional argument B is 1.
50            
51             =cut
52            
53             sub chiprob_test {
54 5     5 1 3914 my ( $self, %args ) = (shift, @_);
55 5 100       41 my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
56 5         165 my $n_bt = scalar keys %{$data};
  5         11  
57 5         28 my $n_wt = $self->equal_n(data => $data);
58 5 50 33     157 croak 'Need to have equal numbers of observations greater than 1 per two or more variables for chiprob_test' if not $n_wt or $n_wt == 1or $n_bt < 2;
      33        
59 5 100       19 my $chi =
60             _definitely_no($args{'correct_ties'})
61             ? _chi_ig_ties( $n_bt, $n_wt, scalar Statistics::Data::Rank->sumsq_ranks_within(data => $data) )
62             : _chi_by_ties( $n_bt, $n_wt, Statistics::Data::Rank->sumsq_ranks_within(data => $data) );
63 5         18 my $df = $n_bt - 1;
64 5         111 my $p_value = chdtrc( $df, $chi ); # Math::Cephes fn
65 5 50       26 return wantarray ? ($chi, $df, ($n_bt * $n_wt), $p_value) : $p_value;
66             }
67            
68             =head2 chiprob_str
69            
70             $str = $fried->chiprob_str(data => HOA, correct_ties => 1);
71            
72             Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., chi^2(df, N = total observations) = chi_value, p = p_value.
73            
74             =cut
75            
76             sub chiprob_str {
77 0     0 1 0 my ( $self, %args ) = (shift, @_);
78 0         0 my ($chi_value, $df, $count, $p_value) = $self->chiprob_test(%args);
79 0         0 return "chi^2($df, N = $count) = $chi_value, p = $p_value";
80             }
81            
82             =head2 fprob_test
83            
84             ($f_value, $df_b, $df_w, $p_value) = $fried->fprob_test(data => HOA);
85             $p_value = $fried->fprob_test(data => HOA);
86            
87             Performs the same test as above but transforms the chi-value into an F-distributed value, returning this F-equivalent value, between and within groups degrees-of-freedom, and then the associated probability off the F-distribution (or only the latter if called in scalar context). Default value of optional argument B is 1. This method has not been tested against sample data as yet.
88            
89             =cut
90            
91             sub fprob_test {
92 0     0 1 0 my ( $self, %args ) = (shift, @_);
93 0 0       0 my $data = $args{'data'} ? delete $args{'data'} : $self->get_hoa(%args);
94 0         0 my $n_bt = scalar keys %{$data};
  0         0  
95 0         0 my $n_wt = $self->equal_n(data => $data);
96 0 0 0     0 croak 'Need to have equal numbers of observations greater than 1 per two or more variables for fprob_test' if not $n_wt or $n_wt == 1 or $n_bt < 2;
      0        
97 0 0       0 my $chi =
98             _definitely_no($args{'correct_ties'})
99             ? _chi_ig_ties( $n_bt, $n_wt, scalar Statistics::Data::Rank->sumsq_ranks_within(data => $data) )
100             : _chi_by_ties( $n_bt, $n_wt, Statistics::Data::Rank->sumsq_ranks_within(data => $data) );
101 0         0 my $f_value = ( ( $n_wt - 1 ) * $chi ) / ( $n_wt * ( $n_bt - 1 ) - $chi );
102 0         0 my $df_b = $n_bt - 1;
103 0         0 my $df_w = ( $n_wt - 1 ) * ($df_b);
104 0         0 my $p_value = fdtrc( $df_b, $df_w, $f_value ); # Math::Cephes fn
105 0 0       0 return wantarray ? ($f_value, $df_b, $df_w, $p_value) : $p_value;
106             }
107            
108             =head2 fprob_str
109            
110             $str = $fried->chiprob_str(data => HOA, correct_ties => 1);
111            
112             Performs the same test as for L but returns not an array but a string of the conventional reporting form, e.g., F(df_b, df_w) = f_value, p = p_value.
113            
114             =cut
115            
116             sub fprob_str {
117 0     0 1 0 my ( $self, %args ) = (shift, @_);
118 0         0 my ($f_value, $df_b, $df_w, $p_value) = $self->fprob_test(%args);
119 0         0 return "F($df_b, $df_w) = $f_value, p = $p_value";
120             }
121            
122             sub _chi_ig_ties {
123 1     1   724 my ( $c, $n, $sumsq ) = @_;
124 1         4 return ( 12 / ( $n * $c * ( $c + 1 ) ) ) * $sumsq - 3 * $n * ( $c + 1 );
125             }
126            
127             sub _chi_by_ties {
128 4     4   1909 my ( $c, $n, $sumsq, $xtied ) = @_;
129 4         17 my $num = 12 * $sumsq - 3 * $n**2 * $c * ( $c + 1 )**2;
130 4         8 my $sum = sum0( map { _sumcubes( $_ ) - $c } values %{$xtied} );
  40         43  
  4         10  
131 4         12 my $den = $n * $c * ( $c + 1 ) - ( 1 / ( $c - 1 ) ) * $sum;
132 4         5 my $chi = $num / $den;
133 4         8 return $chi;
134             }
135            
136             sub _sumcubes {
137 40     40   26 return sum0( map { $_**3 } @{ shift @_ } );
  158         179  
  40         42  
138             }
139            
140             sub _definitely_no {
141 5 100 66 5   51 return ( defined $_[0] and $_[0] == 0 ) ? 1 : 0;
142             }
143            
144             =head1 DEPENDENCIES
145            
146             L : used for summing.
147            
148             L : used for probability functions.
149            
150             L : used as base.
151            
152             L : used to calculate the dependent sum-square of ranks. See this module for retrieving the actual arrays of ranks and sum-squares.
153            
154             =head1 DIAGNOSTICS
155            
156             =over 4
157            
158             =item Need to have equal numbers of observations greater than 1 per two or variables for chiprob_test
159            
160             Ced if there are not equal numbers of numerical values in each given variable, and if there are not at least two variables. Similarly for fprob_test.
161            
162             =back
163            
164             =head1 REFERENCES
165            
166             Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
167            
168             Rice, J. A. (1995). I. Belmont, CA, US: Duxbury.
169            
170             Sarantakos, S. (1993). I. Melbourne, Australia: MacMillan.
171            
172             Siegal, S. (1956). I. New York, NY, US: McGraw-Hill
173            
174             =head1 AUTHOR
175            
176             Roderick Garton, C<< >>
177            
178             =head1 BUGS
179            
180             Please report any bugs or feature requests to C, or through
181             the web interface at L. I will be notified, and then you'll
182             automatically be notified of progress on your bug as I make changes.
183            
184             =head1 SUPPORT
185            
186             You can find documentation for this module with the perldoc command.
187            
188             perldoc Statistics::ANOVA::Friedman
189            
190             You can also look for information at:
191            
192             =over 4
193            
194             =item * RT: CPAN's request tracker (report bugs here)
195            
196             L
197            
198             =item * AnnoCPAN: Annotated CPAN documentation
199            
200             L
201            
202             =item * CPAN Ratings
203            
204             L
205            
206             =item * Search CPAN
207            
208             L
209            
210             =back
211            
212             =head1 LICENSE AND COPYRIGHT
213            
214             Copyright 2015 Roderick Garton.
215            
216             This program is free software; you can redistribute it and/or modify it
217             under the terms of either: the GNU General Public License as published
218             by the Free Software Foundation; or the Artistic License.
219            
220             See L for more information.
221            
222             =cut
223            
224             1; # End of Statistics::ANOVA::Friedman