File Coverage

blib/lib/Text/NumericData/Stat.pm

Criterion	Covered	Total	%
statement	42	43	97.6
branch	7	12	58.3
condition			n/a
subroutine	3	3	100.0
pod	0	1	0.0
total	52	59	88.1

line	stmt	bran	sub	pod	time	code
1						package Text::NumericData::Stat;
2
3						# TODO: Integrate into Text::NumericData::File already.
4						# Also, return something for single data sets; just set non-computable measures to something invalid.
5
6	2		2		1584	use Text::NumericData::File;
	2				6
	2				62
7
8	2		2		12	use strict;
	2				4
	2				997
9
10						# This is just a placeholder because of a past build system bug.
11						# The one and only version for Text::NumericData is kept in
12						# the Text::NumericData module itself.
13						our $VERSION = '1';
14						$VERSION = eval $VERSION;
15
16						# Generate a statistics file out of given input file.
17						# Gives and takes Text::NumericData::File objects.
18						# This could be a method of Text::NumericData::File, but it doesn't have to.
19						sub generate
20						{
21	1		1	0	3	my $in = shift;
22	1				4	my $out = Text::NumericData::File->new($in->{config});
23	1				8	my @mean; # arithmetic mean
24						my @error; # standard error, sqrt(mean sq. error / N-1)
25	1				0	my @min; # minimal value
26	1				0	my @max; # maximal value
27
28	1				4	$out->{title} = 'Statistics';
29	1	50			14	$out->{title} .= ' of '.$in->{title} if defined $in->{title};
30	1				7	$out->{titles} = ['column', 'name', 'mean', 'stderr', 'min', 'max'];
31
32	1				2	my $N = @{$in->{data}};
	1				3
33	1	50			3	return $out if $N < 1;
34	1				2	my $S = @{$in->{data}[0]};
	1				25
35
36	1				5	for my $i (0 .. $S-1)
37						{
38	2				23	$max[$i] = $min[$i] = $mean[$i] = $in->{data}[0][$i];
39						}
40	1				10	for(my $j=1; $j<@{$in->{data}}; ++$j)
	10				21
41						{
42	9				15	my $d = $in->{data}[$j];
43	9				17	for my $i (0 .. $S-1)
44						{
45	18				25	$mean[$i] += $d->[$i];
46	18	50			35	$min[$i] = $d->[$i] if $d->[$i] < $min[$i];
47	18	100			36	$max[$i] = $d->[$i] if $d->[$i] > $max[$i];
48						}
49						}
50	1				10	for my $i (0 .. $S-1)
51						{
52	2				5	$mean[$i] /= $N;
53						}
54	1				1	for my $d (@{$in->{data}})
	1				2
55						{
56	10				15	for my $i (0 .. $S-1)
57						{
58	20				39	$error[$i] += ($d->[$i]-$mean[$i])**2;
59						}
60						}
61	1				5	for my $i (0 .. $S-1)
62						{
63	2	50			10	$error[$i] = $N > 1 ? sqrt($error[$i]/($N-1)) : 0;
64						}
65	1				3	for my $s (1 .. $S)
66						{
67	2				6	my $name = $in->{titles}[$s-1];
68	2	50			4	if(defined $name)
69						{
70	0				0	$out->filter_text($name); # play safe since there is no quoting in data (yet?)
71						}
72						else
73						{
74	2				8	$name = "col$s";
75						}
76	2				13	$out->{data}[$s-1] = [ $s, $name, $mean[$s-1], $error[$s-1], $min[$s-1], $max[$s-1] ];
77						}
78	1				4	return $out;
79						}
80
81						1;
82
83						__END__