File Coverage

blib/lib/Statistics/Test/WilcoxonRankSum.pm

Criterion	Covered	Total	%
statement	306	352	86.9
branch	63	84	75.0
condition	14	21	66.6
subroutine	42	46	91.3
pod	15	15	100.0
total	440	518	84.9

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Test::WilcoxonRankSum;
2
3	11			11		983565	use warnings;
	11					26
	11					439
4	11			11		61	use strict;
	11					20
	11					530
5	11			11		229	use Carp;
	11					30
	11					917
6	11			11		12533	use Carp::Assert;
	11					18589
	11					76
7
8	11			11		18872	use version; our $VERSION = qv('0.0.7');
	11					37420
	11					69
9
10	11			11		17621	use Contextual::Return;
	11					227522
	11					105
11	11			11		50798	use List::Util qw(sum);
	11					30
	11					1526
12	11			11		9948	use Set::Partition;
	11					21389
	11					417
13	11			11		9736	use Math::BigFloat;
	11					150597
	11					269
14	11			11		327621	use Math::Counting ':big';
	11					84383
	11					3971
15	11			11		12988	use Statistics::Distributions;
	11					40891
	11					668
16
17	11			11		14385	use Class::Std;
	11					163191
	11					9521
18
19							{
20							############ Data ######################################################################
21
22							my %EXACT_UPTO : ATTR( :init_arg :default<20> );
23							my %dataset1_of : ATTR( :get ); # array of numbers
24							my %dataset2_of : ATTR( :get ); # array of numbers
25							my %n1_of : ATTR( :get ); # number of elements in dataset 1
26							my %n2_of : ATTR( :get ); # number of elements in dataset 2
27							my %N_of : ATTR( :get ); # overall number of elements (ranks)
28							my %MaxSum_of : ATTR( :get ); # biggest possible ranksum
29							my %ranks_of : ATTR( :get :set ); # hash with ranked data
30							my %rank_array_of : ATTR( :get ); # rank array from %ranks
31							my %rankSum1_of : ATTR( :get ); # rank sum for dataset 1
32							my %expected_rank_sum_1_of : ATTR( :get); # expected rank sum for dataset 1
33							my %expected_rank_sum_2_of : ATTR( :get); # expected rank sum for dataset 2
34							my %rankSum2_of : ATTR( :get ); # rank sum for dataset 2
35							my %smaller_rank_sum_of : ATTR;
36							my %smaller_ranks_count_of : ATTR;
37							my %expected_rank_count_for_smaller_ranks_count_of : ATTR( :get);
38							my %smaller_rank_sums_count_of : ATTR; # number of possible arrangements with lesser rank sum
39							# than the smaller rank sum
40							my %rank_sums_other_than_expected_count_of : ATTR; # number of possible arrangements with rank sum
41							# other than the smaller rank sum
42							my %probability_of : ATTR; # probability for the ranking with smaller rank sum
43							my %probability_normal_approx_of : ATTR;
44
45							############ Utility subroutines #######################################################
46
47							sub _check_dataset {
48	37			37		59	my ($dataset_ref) = @_;
49
50	37	50				115	croak "Need array ref to dataset\n"
51							unless ($dataset_ref);
52
53	37	50				125	croak "Datasets must be passed as array references\n"
54							unless (ref($dataset_ref) eq 'ARRAY');
55
56	37					58	my @dataset = grep { $_ > 0 } @{ $dataset_ref };
	283					622
	37					75
57	37	100				155	croak "dataset has no element greater 0\n" unless (@dataset);
58
59	36					154	return \@dataset;
60
61							}
62
63							sub _compute_N_MaxSum {
64	35			35		61	my ($id) = @_;
65
66	35					49	my $N;
67	35	100				104	unless ($N_of{$id}) {
68	18					45	$N = $n1_of{$id} + $n2_of{$id};
69	18					43	$N_of{$id} = $N;
70							}
71
72	35	100				92	unless ($MaxSum_of{$id}) {
73	18					78	$MaxSum_of{$id} = $N*($N+1)/2;
74							}
75
76	35	100				87	unless ($expected_rank_sum_1_of{$id}) {
77	18					51	$expected_rank_sum_1_of{$id} = $n1_of{$id}*$N/2;
78							}
79
80	35	100				96	unless ($expected_rank_sum_2_of{$id}) {
81	18					55	$expected_rank_sum_2_of{$id} = $n2_of{$id}*$N/2;
82							}
83
84	35					58	return;
85							}
86
87
88
89							sub _reset_dependant_datastructures {
90	18			18		41	my ($id) = @_;
91
92	18					169	delete $ranks_of{$id};
93	18					66	delete $rank_array_of{$id};
94	18					36	delete $rankSum1_of{$id};
95	18					32	delete $rankSum2_of{$id};
96	18					35	delete $N_of{$id};
97	18					40	delete $MaxSum_of{$id};
98	18					32	delete $smaller_rank_sum_of{$id};
99	18					33	delete $smaller_rank_sums_count_of{$id};
100	18					30	delete $probability_of{$id};
101	18					39	delete $probability_normal_approx_of{$id};
102	18					31	delete $expected_rank_sum_1_of{$id};
103	18					30	delete $expected_rank_sum_2_of{$id};
104
105	18					35	return;
106							}
107
108							sub _rank_sum_for {
109	15			15		38	my ($self, $dataset) = @_;
110
111	15					56	my $id = ident $self;
112
113	15					32	my @rank_array;
114
115	15	100	66			81	if ($rank_array_of{$id} and @{ $rank_array_of{$id} }) {
	1					4
116	1					2	@rank_array = @{ $rank_array_of{$id} };
	1					3
117							} else {
118	14					63	@rank_array = $self->compute_rank_array();
119							}
120
121	15					278	return sum map { $_->[0] } grep { $_->[1] eq $dataset } @rank_array;
	99					298
	234					658
122							}
123
124
125							sub _set_smaller_rank_for {
126	14			14		37	my ($id, $rank_sum_1, $rank_sum_2) = @_;
127	14	100				64	if ($rank_sum_1 <= $rank_sum_2) {
128	8					19	$smaller_rank_sum_of{$id} = $rank_sum_1;
129	8					20	$smaller_ranks_count_of{$id} = $n1_of{$id};
130	8					20	$expected_rank_count_for_smaller_ranks_count_of{$id} = $expected_rank_sum_1_of{$id};
131							} else {
132	6					16	$smaller_rank_sum_of{$id} = $rank_sum_2;
133	6					14	$smaller_ranks_count_of{$id} = $n2_of{$id};
134	6					15	$expected_rank_count_for_smaller_ranks_count_of{$id} = $expected_rank_sum_2_of{$id};
135							};
136	14					38	return;
137							};
138
139							sub _NormalZ { # ($Z) -> $p
140	0			0		0	my ($x) = @_;
141							#
142							# P(x) = 1 - Z(x)(b1t+b2t*2+b3t*3+b4t*4+b5t**5)
143							# Z(x) = exp(-$x$x/2.0)/(sqrt(23.14159265358979323846))
144							# t = 1/(1+p*x)
145							#
146							# Parameters
147	0					0	my @b = (0.319381530, -0.356563782, 1.781477937, -1.821255978, 1.330274429);
148	0					0	my $p = 0.2316419;
149	0					0	my $t = 1/(1+$p*$x);
150							# Initialize variables
151	0					0	my $fact = $t;
152	0					0	my $Sum;
153							# Sum polynomial
154	0					0	foreach my $bi (@b) {
155	0					0	$Sum += $bi*$fact;
156	0					0	$fact *= $t;
157							};
158							# Calculate probability
159	0					0	$p = 2$Sumexp(-$x$x/2.0)/(sqrt(23.14159265358979323846));
160							#
161	0					0	return $p;
162							};
163
164							############ Methods ###################################################################
165
166							sub set_dataset1 {
167	0			0	1	0	my ($self, $dataset1_ref) = @_;
168
169	0					0	$dataset1_ref = _check_dataset($dataset1_ref);
170
171	0					0	my $id = ident $self;
172	0					0	$dataset1_of{$id} = $dataset1_ref;
173	0					0	$n1_of{$id} = scalar(@{ $dataset1_ref });
	0					0
174
175	0					0	_reset_dependant_datastructures($id);
176
177	0					0	return;
178							}
179
180							sub set_dataset2 {
181	1			1	1	1922	my ($self, $dataset2_ref) = @_;
182
183	1					4	$dataset2_ref = _check_dataset($dataset2_ref);
184
185	0					0	my $id = ident $self;
186	0					0	$dataset1_of{$id} = $dataset2_ref;
187	0					0	$n2_of{$id} = scalar(@{ $dataset2_ref });
	0					0
188
189	0					0	_reset_dependant_datastructures($id);
190
191	0					0	return;
192							}
193
194							sub load_data {
195	18			18	1	22894	my ($self, $dataset1_ref, $dataset2_ref) = @_;
196
197	18					78	$dataset1_ref = _check_dataset($dataset1_ref);
198	18					52	$dataset2_ref = _check_dataset($dataset2_ref);
199
200	18					79	my $id = ident $self;
201
202	18					53	$dataset1_of{$id} = $dataset1_ref;
203	18					56	$dataset2_of{$id} = $dataset2_ref;
204	18					40	$n1_of{$id} = scalar(@{ $dataset1_ref });
	18					55
205	18					32	$n2_of{$id} = scalar(@{ $dataset2_ref });
	18					48
206
207	18					66	_reset_dependant_datastructures($id);
208
209	18					55	_compute_N_MaxSum($id);
210
211	18					49	return;
212							}
213
214							sub compute_ranks {
215	18			18	1	367	my ($self) = @_;
216	18					51	my $id = ident $self;
217
218	18	100	66			177	croak "Please set/load datasets before computing ranks\n" unless ($dataset1_of{$id} and $dataset2_of{$id});
219
220	17					31	my @dataset1 = @{ $dataset1_of{$id} };
	17					69
221	17					41	my @dataset2 = @{ $dataset2_of{$id} };
	17					78
222
223							# at this point we are sure we have both data sets, so we may as well compute N and MaxSum - if not already computed
224	17					56	_compute_N_MaxSum($id);
225
226	17					28	my %ranks;
227
228	17					39	foreach my $el (@dataset1) {
229	113					519	$ranks{$el}->{in_dataset}->{ds1}++;
230							}
231	17					41	foreach my $el (@dataset2) {
232	153					598	$ranks{$el}->{in_dataset}->{ds2}++;
233							}
234
235	17					42	my $rank=0;
236	17					165	foreach my $value (sort { $a <=> $b } keys %ranks) {
	671					872
237
238	236					492	my $tied_ranks;
239
240	236					218	foreach my $ds (keys %{ $ranks{$value}->{in_dataset} }) {
	236					686
241	246					623	$tied_ranks += $ranks{$value}->{in_dataset}->{$ds};
242							}
243
244	236					672	assert $tied_ranks if DEBUG;
245
246	236					726	my $rs;
247	236					427	for my $r ($rank+1 .. $rank+$tied_ranks) {
248	266					524	$rs += $r;
249							}
250	236					509	$ranks{$value}->{rank} = $rs/$tied_ranks;
251	236					337	$ranks{$value}->{tied} = $tied_ranks;
252
253	236					343	$rank+=$tied_ranks;
254							}
255
256	17					70	$ranks_of{$id} = \%ranks;
257
258	17					195	return $ranks_of{$id};
259							}
260
261							sub compute_rank_array {
262	28			28	1	13104	my ($self) = @_;
263	28					75	my $id = ident $self;
264
265	28					65	my @rank_array;
266	28	100	66			173	if ($rank_array_of{$id} and @{ $rank_array_of{$id} } ) {
	11					55
267	11					19	@rank_array = @{ $rank_array_of{$id} };
	11					54
268							} else {
269
270	17					30	my %ranks;
271
272	17	100	66			84	if ($ranks_of{$id} and %{ $ranks_of{$id} } ) {
	2					21
273	2					3	%ranks = %{ $ranks_of{$id} };
	2					19
274							} else {
275	15					30	%ranks = %{ $self->compute_ranks() };
	15					88
276							}
277
278	17					131	foreach my $value (sort { $a <=> $b } keys %ranks) {
	675					807
279	236					286	foreach my $ds (keys %{ $ranks{$value}->{in_dataset} }) {
	236					528
280	246					490	for (1..$ranks{$value}->{in_dataset}->{$ds}) {
281	266					1082	push(@rank_array, [ $ranks{$value}->{rank}, $ds ]);
282							}
283							}
284							}
285
286	17					94	$rank_array_of{$id} = \@rank_array;
287
288							}
289
290							return (
291	1			1		266	SCALAR { scalar @rank_array } # How many?
292	27			27		12575	LIST { @rank_array } # What are they?
293	28					286	);
294							}
295
296							sub rank_sum_for {
297	15			15	1	38	my ($self, $for_dataset) = @_;
298
299	15					56	my $id = ident $self;
300
301	15					22	my $rankSum;
302	15	100				89	if ($for_dataset =~ m{1}) {
		50
303	14	50				55	if ($rankSum1_of{$id}) {
304	0					0	return $rankSum1_of{$id};
305							} else {
306	14					57	$rankSum1_of{$id} = $self->_rank_sum_for('ds1');
307	14					221	return $rankSum1_of{$id};
308							}
309							} elsif ($for_dataset =~ m{2}) {
310	1	50				5	if ($rankSum2_of{$id}) {
311	0					0	return $rankSum2_of{$id};
312							} else {
313	1					3	$rankSum2_of{$id} = $self->_rank_sum_for('ds2');
314	1					4	return $rankSum2_of{$id};
315							}
316							} else {
317	0					0	croak "Argument must match `1' or `2' (meaning dataset 1 or 2)\n";
318							}
319
320	0					0	return;
321
322							}
323
324							sub get_smaller_rank_sum {
325	32			32	1	1308	my ($self) = @_;
326
327	32					86	my $id = ident $self;
328
329	32	100	66			220	if ($smaller_rank_sum_of{$id} and $smaller_ranks_count_of{$id}) {
330
331							return (
332	0			0		0	SCALAR { $smaller_rank_sum_of{$id} } # only the rank sum itselt
333	18			18		4720	LIST { ($smaller_rank_sum_of{$id}, $smaller_ranks_count_of{$id} ) } # also the size of the corresponding ds
334	18					160	);
335							}
336
337	14					43	my $rank_sum_1 = $rankSum1_of{$id};
338	14					64	my $rank_sum_2 = $rankSum2_of{$id};
339
340	14	50	66			92	if (not($rank_sum_1) and not($rank_sum_2)) {
341	13					50	$rank_sum_1 = $self->rank_sum_for('ds1');
342							}
343
344
345	14	100	66			207	if ($rank_sum_1 and $rank_sum_2) {
		50
		0
346
347	1					11	_set_smaller_rank_for($id, $rank_sum_1, $rank_sum_2);
348
349							} elsif ($rank_sum_1) {
350	13					40	$rank_sum_2 = $MaxSum_of{$id} - $rank_sum_1;
351	13					28	$rankSum2_of{$id} = $rank_sum_2;
352
353	13					88	_set_smaller_rank_for($id, $rank_sum_1, $rank_sum_2);
354
355							} elsif ($rank_sum_2) {
356	0					0	$rank_sum_1 = $MaxSum_of{$id} - $rank_sum_2;
357	0					0	$rankSum1_of{$id} = $rank_sum_1;
358
359	0					0	_set_smaller_rank_for($id, $rank_sum_1, $rank_sum_2);
360
361							}
362
363							return (
364	2			2		449	SCALAR { $smaller_rank_sum_of{$id} } # only the rank sum itselt
365	12			12		2217	LIST { ($smaller_rank_sum_of{$id}, $smaller_ranks_count_of{$id} ) } # also the size of the corresponding ds
366	14					112	);
367
368
369	0					0	return $smaller_rank_sum_of{$id};
370							}
371
372							sub smaller_rank_sums_count {
373	1			1	1	5	my ($self) = @_;
374	1					4	my $id = ident $self;
375
376	1	50				4	if ($smaller_rank_sums_count_of{$id}) {
377	0					0	return $smaller_rank_sums_count_of{$id};
378							};
379
380	1					5	my ($W, $nA) = $self->get_smaller_rank_sum();
381	1					10	my $N = $N_of{$id};
382	1					13	my $nB = $N - $nA;
383	1					2	my $MaxSum = $MaxSum_of{$id};
384
385	1					3	my @ranks = map { $_->[0] } $self->compute_rank_array();
	10					23
386
387							# let's do some checks before starting the big counting
388	1	50				4	if ($W > $MaxSum) { croak "Rank sum bound $W is bigger than the maximum possible rank sum $MaxSum\n" };
	0					0
389	1	50				18	if ($N != scalar(@ranks))
390	0					0	{ croak "Sum of $nA and $nB must be equal to number of ranks: ".scalar(@ranks)."\n" };
391
392							# compute all possible partitions
393	1					13	my $s = Set::Partition->new(
394							list => \@ranks,
395							partition => [$nA, $nB],
396							);
397
398	1					32	my $count_less_W = 0;
399
400	1					6	while (my $p = $s->next()) {
401	252					15785	my @pA = @{ $p->[0] };
	252					609
402	252					1098	my $sumA = sum @pA;
403	252	100				2871	if ($sumA <= $W) {
404	64					308	$count_less_W++;
405							}
406							}
407
408	1					61	return $count_less_W;
409
410							};
411
412							sub rank_sums_other_than_expected_counts {
413	9			9	1	20	my ($self) = @_;
414	9					28	my $id = ident $self;
415
416	9	50				44	if ($rank_sums_other_than_expected_count_of{$id}) {
417	0					0	return $rank_sums_other_than_expected_count_of{$id};
418							};
419
420	9					31	my ($W, $nA) = $self->get_smaller_rank_sum();
421	9					100	my $W_exp = $self->get_expected_rank_count_for_smaller_ranks_count();
422
423	9					196	my $N = $N_of{$id};
424	9					19	my $nB = $N - $nA;
425	9					17	my $MaxSum = $MaxSum_of{$id};
426
427	9					34	my @ranks = map { $_->[0] } $self->compute_rank_array();
	125					1165
428
429							# let's do some checks before starting the big counting
430	9	50				45	if ($W > $MaxSum) { croak "Rank sum bound $W is bigger than the maximum possible rank sum $MaxSum\n" };
	0					0
431	9	50				176	if ($N != scalar(@ranks))
432	0					0	{ croak "Sum of $nA and $nB must be equal to number of ranks: ".scalar(@ranks)."\n" };
433
434							# compute all possible partitions
435	9					85	my $s = Set::Partition->new(
436							list => \@ranks,
437							partition => [$nA, $nB],
438							);
439
440	9					334	my $count_other_W = 0;
441
442	9	100				32	if ($W >= $W_exp) {
443
444	2					10	while (my $p = $s->next()) {
445	235					11256	my @pA = @{ $p->[0] };
	235					485
446	235					454	my $sumA = sum @pA;
447	235	100				1012	if ($sumA >= $W) {
448	40					160	$count_other_W++;
449							}
450							}
451
452							} else {
453
454	7					36	while (my $p = $s->next()) {
455	401753					32418958	my @pA = @{ $p->[0] };
	401753					1211295
456	401753					1062066	my $sumA = sum @pA;
457	401753	100				2300053	if ($sumA <= $W) {
458	17476					87277	$count_other_W++;
459							}
460							}
461
462							}
463
464	9					731	return $count_other_W;
465
466							};
467
468
469							sub probability :NUMERIFY {
470	6			6	1	31	my ($self) = @_;
471	6					20	my $id = ident $self;
472
473	6	50				25	if ($probability_of{$id}) {
474	0					0	return $probability_of{$id};
475							}
476
477	6					28	my ($W, $nA) = $self->get_smaller_rank_sum();
478	6					55	my $N = $N_of{$id};
479
480	6					79	my $p;
481	6	100				21	if ($N <= $EXACT_UPTO{$id}) {
482	5					21	$p = $self->probability_exact();
483							} else {
484	1					5	$p = $self->probability_normal_approx();
485							}
486
487	6					19	$probability_of{$id} = $p;
488
489	6					36	return $probability_of{$id};
490	11			11		47805	}
	11					25
	11					77
491
492							sub probability_exact {
493	9			9	1	4681	my ($self) = @_;
494	9					34	my $id = ident $self;
495
496	9					32	my ($W, $nA) = $self->get_smaller_rank_sum();
497	9					108	my $N = $N_of{$id};
498
499	9					176	my $partition_count = bcomb($N, $nA);
500	9					9120	my $have_smaller_rank_sums = $self->rank_sums_other_than_expected_counts();
501	9					97	my $p = Math::BigFloat->new($have_smaller_rank_sums) * 2.0 / Math::BigFloat->new($partition_count);
502
503	9	100				11804	if ($p > 1) { $p = 1 };
	1					326
504
505	9					1918	return $p;
506							}
507
508							sub probability_normal_approx {
509	5			5	1	2214	my ($self) = @_;
510	5					45	my $id = ident $self;
511
512	5					21	my ($W, $nA) = $self->get_smaller_rank_sum();
513	5					49	my $N = $N_of{$id};
514	5					70	my $nB = $N - $nA;
515	5					17	my $mean = $nA*($N+1)/2;
516	5					18	my $deviation = sqrt($nA$nB($N+1)/12.0);
517	5	100				19	my $continuity = (($W - $mean) >= 0) ? -0.5 : +0.5;
518	5					11	my $z = ($W - $mean + $continuity)/$deviation;
519	5					12	@{ $probability_normal_approx_of{$id} }{'mean', 'std deviation', 'z'} = ($mean, $deviation, $z);
	5					35
520
521	5					44	my $p = 2*Statistics::Distributions::uprob(abs($z));
522
523	5					364	return $p;
524
525							}
526
527
528							sub probability_status {
529	4			4	1	3538	my ($self) = (@_);
530	4					11	my $id = ident $self;
531
532	4					6	my $return_string;
533	4	50				17	if ($probability_of{$id}) {
534	4	100				69	if ($probability_normal_approx_of{$id}) {
535	2					5	$return_string = sprintf "Probability: %10f, normal approx w. mean: %10f, std deviation: %10f, z: %10f", $probability_of{$id}, map { $probability_normal_approx_of{$id}->{$_} } ('mean', 'std deviation', 'z');
	6					44
536							} else {
537	2					10	$return_string = sprintf "Probability: %10f, exact", $probability_of{$id};
538							}
539							} else {
540	0					0	$return_string = "Probability not yet computed";
541							}
542
543							return (
544	4			4		493	STR { "$return_string" }
545	0			0			VOID { print $return_string."\n" }
546	4					137	);
547							}
548
549							sub as_hash :HASHIFY {
550	2			2	1	3	my ($self) = @_;
551	2					5	my $id = ident $self;
552
553							return {
554	2					37	dataset_1 => $dataset1_of{$id},
555							dataset_2 => $dataset2_of{$id},
556							n1 => $n1_of{$id},
557							n2 => $n2_of{$id},
558							N => $N_of{$id},
559							rank_array => $rank_array_of{$id},
560							rank_sum_1 => $rankSum1_of{$id},
561							rank_sum_2 => $rankSum2_of{$id},
562							rank_sum_1_expected => $expected_rank_sum_1_of{$id},
563							rank_sum_2_expected => $expected_rank_sum_2_of{$id},
564							probability => $probability_of{$id},
565							probability_normal_approx => $probability_normal_approx_of{$id},
566							};
567
568	11			11		12366	}
	11					32
	11					55
569
570
571							sub summary :STRINGIFY {
572	2			2	1	1110	my ($self) = (@_);
573	2					7	my $id = ident $self;
574
575	2					7	my $hash = $self->as_hash();
576
577	2					3	my $return_string;
578	2	50				17	if (not($hash->{dataset_1})) {
		50
579	0					0	$return_string = "Dataset 1 is not yet initialised, no computations could be done\n";
580							} elsif (not($hash->{dataset_2})) {
581	0					0	$return_string = "Dataset 2 is not yet initialised, no computations could be done\n";
582							} else {
583	2					5	my $format = <
584							----------------------------------------------------------------
585							dataset \| n \| rank sum: observed / expected
586							----------------------------------------------------------------
587							1 \|%7d \| %7d /%7d
588							----------------------------------------------------------------
589							2 \|%7d \| %7d /%7d
590							----------------------------------------------------------------
591							N (size of both datasets): %7d
592							%s
593							END_FORMAT
594	2					5	my $prob = $self->probability_status();
595	2					285	$return_string = sprintf $format, @{ $hash }{'n1', 'rank_sum_1', 'rank_sum_1_expected', 'n2', 'rank_sum_2', 'rank_sum_2_expected', 'N'}, $prob;
	2					22
596	2	100				35	if ($hash->{probability} >= 0.05) {
597	1					3	$return_string.="Not significant (at 0.05 level)\n";
598							} else {
599	1					283	$return_string.="Significant (at 0.05 level)\n";
600	1	50				6	$return_string.= $hash->{rank_sum_1} > $hash->{rank_sum_1_expected} ?
601							"Ranks of dataset 1 are higher than expected\n"
602							: "Ranks of dataset 1 are lower than expected\n";
603
604							}
605	2	50				18	if ($hash->{N} < 5) {
606	0					0	$return_string.="Warning: sample size ($hash->{N}) too small (<5)!\n";
607							}
608							}
609
610							return (
611	1			1		260	STR { "$return_string" }
612	1			1		838	VOID { print $return_string }
613	2					36	);
614	11			11		7962	}
	11					23
	11					53
615
616							}
617
618							1; # Magic true value required at end of module
619
620
621							__END__