File Coverage

blib/lib/Statistics/KruskalWallis.pm

Criterion	Covered	Total	%
statement	87	90	96.6
branch	6	12	50.0
condition	1	3	33.3
subroutine	10	10	100.0
pod	0	4	0.0
total	104	119	87.3

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::KruskalWallis;
2
3							$VERSION = '0.01';
4
5	1			1		20593	use strict;
	1					3
	1					41
6	1			1		5	use Carp;
	1					3
	1					83
7	1			1		1456	use Statistics::Distributions;
	1					9777
	1					894
8
9							##############
10							sub new
11							{
12	2			2	0	23	my $proto = shift;
13	2		33			15	my $class = ref($proto) \|\| $proto;
14	2					3	my $self= {};
15
16	2					7	$self->{sample_data} = undef;
17	2					4	$self->{rank_data} = undef;
18	2					3	$self->{no_of_sets} = 0;
19	2					4	$self->{no_of_samples} = 0;
20
21
22	2					5	bless($self,$class);
23	2					10	return $self;
24							}
25							##############
26
27							##############
28							sub load_data {
29	6			6	0	16	my $self = shift;
30	6					9	my $sample_name = shift;
31
32	6					12	my (@sample_data)=@_;
33
34	6					11	$self->{no_of_samples}+=@sample_data;;
35	6					11	$self->{no_of_sets} = $self->{no_of_sets} + 1;
36	6					15	$self->{sample_data}->{$sample_name}=\@sample_data;
37	6					17	$self->{rank_data}->{$sample_name}->{sum} = 0;
38	6					12	$self->{rank_data}->{$sample_name}->{n}=0;
39
40	6					18	return 1;
41							} # end sub load_data
42							##############
43
44							##############
45							sub perform_kruskal_wallis_test {
46	1			1	0	3	my $self=shift;
47
48	1					2	my ($sample_name,$sample_data_element,$sample_data_value);
49	1					4	my ($grouped_data_ref) = $self->_group_data();
50	1					4	$self->_rank_data($grouped_data_ref);
51	1					6	my ($H) = $self->_calculate_H();
52	1					7	my ($chi_prob)=Statistics::Distributions::chisqrprob (($self->{no_of_sets}-1),$H);
53
54							# chi_prob only valid when no_of_sets > 3
55	1					80	return ($H,$chi_prob);
56							} # end sub
57							##############
58
59							##############
60							sub _group_data {
61
62	1			1		2	my $self = shift;
63	1					3	my (%grouped_data,$sample_name,$sample_data_element);
64
65	1					2	foreach $sample_name (keys(%{$self->{sample_data}})) {
	1					14
66	3					4	foreach $sample_data_element (@{$self->{sample_data}->{$sample_name}}){
	3					8
67	21	100				79	if (exists($grouped_data{$sample_data_element}))
68							{
69	2					3	push @{$grouped_data{$sample_data_element}}, $sample_name;
	2					14
70							} # end if
71							else
72							{
73	19					83	$grouped_data{$sample_data_element} = [$sample_name];
74							} # end else
75							} # end foreach sample_data_element
76							} # end foreach sample name
77
78	1					4	return (\%grouped_data);
79							} # end sub _group_data
80							##############
81
82							##############
83							sub _rank_data {
84
85	1			1		3	my $self = shift;
86	1					2	my $grouped_data_ref = shift;
87
88	1					2	my $rank = 1;
89	1					2	my ($sample_name,$sample_data_value);
90
91	1					2	foreach $sample_name (keys(%{$self->{sample_data}})) {
	1					5
92	3					6	$self->{rank_data}->{$sample_name}->{sum} = 0;
93	3					8	$self->{rank_data}->{$sample_name}->{n} = 0;
94							} # end foreach
95
96
97	1					11	foreach $sample_data_value (sort { $a <=> $b } (keys(%$grouped_data_ref))) {
	62					65
98
99	19	100				21	if (@{$$grouped_data_ref{$sample_data_value}} > 1) {$rank+=0.5;}
	19					46
	2					4
100
101	19					23	foreach $sample_name (@{$$grouped_data_ref{$sample_data_value}}) {
	19					29
102	21					34	$self->{rank_data}->{$sample_name}->{sum}+= $rank;
103	21					42	$self->{rank_data}->{$sample_name}->{n}++;
104							} # end foreach
105
106	19					34	$rank=int($rank+1.5);
107							} # end foreach
108							} # end sub
109							##############
110
111							##############
112							sub _calculate_H {
113
114	1			1		3	my $self = shift;
115							# calculate mean sum
116
117	1					2	my $sample_name;
118	1					1	my $mean_sq_sum = 0;
119
120	1					19	foreach $sample_name (keys(%{$self->{sample_data}})) {
	1					4
121	3					11	$mean_sq_sum += ($self->{rank_data}->{$sample_name}->{sum}**2) / $self->{rank_data}->{$sample_name}->{n};
122							} # end foreach samplename
123
124							# calculate kw statistic
125
126	1					4	my $H = 12 / ( $self->{no_of_samples} * ($self->{no_of_samples} + 1) );
127	1					2	$H = $H * $mean_sq_sum;
128	1					2	$H = $H - 3 * ($self->{no_of_samples} + 1);
129
130	1					3	return ($H);
131							} # end sub _calculate_H
132							##############
133
134							##############
135							sub post_hoc {
136
137	1			1	0	17	my $self = shift;
138	1					2	my $test_name = shift;
139	1					4	my ($control_group_name,$test_group_name)=@_;
140
141	1					2	my ($p_value,$q);
142
143							# one day may add further post-hoc tests
144	1	50				6	if ($test_name eq 'Newman-Keuls') {
145	1					4	my $SE = ( $self->{no_of_samples} * ($self->{no_of_samples} + 1) ) / 12;
146	1					5	$SE = $SE * ( 1/$self->{rank_data}->{$control_group_name}->{n} + 1/$self->{rank_data}->{$test_group_name}->{n});
147	1					22	$SE = $SE**0.5;
148
149	1					4	my $r1 = $self->{rank_data}->{$control_group_name}->{sum} / $self->{rank_data}->{$control_group_name}->{n};
150	1					4	my $r2 = $self->{rank_data}->{$test_group_name}->{sum} / $self->{rank_data}->{$test_group_name}->{n};
151
152	1					3	$q = ( $r1 - $r2 ) / $SE;
153
154	1	50				4	if ($q>2.576) {$p_value='>0.01';}
	1	0				590
	0	0				0
155	0					0	elsif ($q>1.960) {$p_value='>0.05';}
156	0					0	elsif ($q>1.645) {$p_value='>0.1';}
157							else {$p_value='<0.1';}
158
159							} # end test
160
161	1					11	return ($q,$p_value);
162							} # end sub post_hoc
163							##############
164							1;
165
166							__END__