line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
5526
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
76
|
|
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Statistics::Contingency; |
4
|
|
|
|
|
|
|
{ |
5
|
|
|
|
|
|
|
$Statistics::Contingency::VERSION = '0.09'; |
6
|
|
|
|
|
|
|
} |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Correct=Y Correct=N |
9
|
|
|
|
|
|
|
# +-----------+-----------+ |
10
|
|
|
|
|
|
|
# Assigned=Y | a | b | |
11
|
|
|
|
|
|
|
# +-----------+-----------+ |
12
|
|
|
|
|
|
|
# Assigned=N | c | d | |
13
|
|
|
|
|
|
|
# +-----------+-----------+ |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# accuracy = (a+d)/(a+b+c+d) |
16
|
|
|
|
|
|
|
# precision = a/(a+b) |
17
|
|
|
|
|
|
|
# recall = a/(a+c) |
18
|
|
|
|
|
|
|
# F1 = 2a/(2a + b + c) |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Edge cases: |
21
|
|
|
|
|
|
|
# precision(0,0,+,d) = 0 |
22
|
|
|
|
|
|
|
# precision(a,0,c,d) = 1 |
23
|
|
|
|
|
|
|
# precision(0,+,c,d) = 0 |
24
|
|
|
|
|
|
|
# recall(a,b,0,d) = 1 |
25
|
|
|
|
|
|
|
# recall(0,b,+,d) = 0 |
26
|
|
|
|
|
|
|
# F1(a,0,0,d) = 1 |
27
|
|
|
|
|
|
|
# F1(0,+++,d) = 0 |
28
|
|
|
|
|
|
|
|
29
|
1
|
|
|
1
|
|
1024
|
use Params::Validate qw(:all); |
|
1
|
|
|
|
|
10178
|
|
|
1
|
|
|
|
|
1698
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub new { |
32
|
8
|
|
|
8
|
1
|
347
|
my $package = shift; |
33
|
8
|
|
|
|
|
188
|
my $self = bless { validate @_, |
34
|
|
|
|
|
|
|
{ |
35
|
|
|
|
|
|
|
verbose => { type => SCALAR, default => 0 }, |
36
|
|
|
|
|
|
|
categories => { type => ARRAYREF|HASHREF }, |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
}, $package; |
39
|
|
|
|
|
|
|
|
40
|
8
|
|
|
|
|
80
|
$self->{$_} = 0 foreach qw(a b c d); |
41
|
8
|
|
|
|
|
17
|
my $c = delete $self->{categories}; |
42
|
8
|
50
|
|
|
|
32
|
$self->{categories} = { map {($_ => {a=>0, b=>0, c=>0, d=>0})} |
|
32
|
|
|
|
|
161
|
|
43
|
|
|
|
|
|
|
UNIVERSAL::isa($c, 'HASH') ? keys(%$c) : @$c |
44
|
|
|
|
|
|
|
}; |
45
|
8
|
|
|
|
|
33
|
return $self; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub set_entries { |
49
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
50
|
1
|
|
|
|
|
2
|
@{ $self }{'a', 'b', 'c', 'd'} = @_; |
|
1
|
|
|
|
|
5
|
|
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub add_result { |
54
|
8
|
|
|
8
|
1
|
74
|
my ($self, $assigned, $correct, $name) = @_; |
55
|
8
|
|
|
|
|
14
|
my $cats_table = $self->{categories}; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Hashify |
58
|
8
|
|
|
|
|
15
|
foreach ($assigned, $correct) { |
59
|
16
|
50
|
|
|
|
34
|
$_ = {$_ => 1}, next unless ref $_; |
60
|
16
|
50
|
|
|
|
42
|
next if UNIVERSAL::isa($_, 'HASH'); # Leave alone |
61
|
16
|
50
|
|
|
|
52
|
$_ = { map {($_ => 1)} @$_ }, next if UNIVERSAL::isa($_, 'ARRAY'); |
|
19
|
|
|
|
|
61
|
|
62
|
0
|
|
|
|
|
0
|
die "Unknown type '$_' for category list"; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Add to the micro/macro tables |
66
|
8
|
|
|
|
|
52
|
foreach my $cat (keys %$cats_table) { |
67
|
32
|
100
|
100
|
|
|
97
|
$cats_table->{$cat}{a}++, $self->{a}++ if $assigned->{$cat} and $correct->{$cat}; |
68
|
32
|
100
|
100
|
|
|
92
|
$cats_table->{$cat}{b}++, $self->{b}++ if $assigned->{$cat} and !$correct->{$cat}; |
69
|
32
|
100
|
100
|
|
|
115
|
$cats_table->{$cat}{c}++, $self->{c}++ if !$assigned->{$cat} and $correct->{$cat}; |
70
|
32
|
100
|
66
|
|
|
119
|
$cats_table->{$cat}{d}++, $self->{d}++ if !$assigned->{$cat} and !$correct->{$cat}; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
8
|
50
|
|
|
|
24
|
if ($self->{verbose}) { |
74
|
0
|
|
|
|
|
0
|
print "$name: assigned=(@{[ keys %$assigned ]}) correct=(@{[ keys %$correct ]})\n"; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Clear any cached results |
78
|
8
|
|
|
|
|
14
|
delete $self->{macro}; |
79
|
|
|
|
|
|
|
|
80
|
8
|
|
|
|
|
31
|
$self->{hypotheses}++; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub _invert { |
84
|
69
|
|
|
69
|
|
86
|
my ($self, $x, $y) = @_; |
85
|
69
|
100
|
|
|
|
203
|
return 1 unless $y; |
86
|
33
|
100
|
|
|
|
607
|
return 0 unless $x; |
87
|
11
|
|
|
|
|
53
|
return 1 / (1 + $y/$x); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub _accuracy { |
91
|
16
|
|
|
16
|
|
18
|
my $h = $_[1]; |
92
|
16
|
50
|
|
|
|
56
|
return 1 unless grep $h->{$_}, qw(a b c d); |
93
|
16
|
|
|
|
|
67
|
return +($h->{a} + $h->{d}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d}); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub _error { |
97
|
18
|
|
|
18
|
|
20
|
my $h = $_[1]; |
98
|
18
|
50
|
|
|
|
61
|
return 0 unless grep $h->{$_}, qw(a b c d); |
99
|
18
|
|
|
|
|
106
|
return +($h->{b} + $h->{c}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d}); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub _precision { |
103
|
24
|
|
|
24
|
|
28
|
my ($self, $h) = @_; |
104
|
24
|
100
|
100
|
|
|
92
|
return 0 if $h->{c} and !$h->{a} and !$h->{b}; |
|
|
|
100
|
|
|
|
|
105
|
21
|
|
|
|
|
54
|
return $self->_invert($h->{a}, $h->{b}); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _recall { |
109
|
25
|
|
|
25
|
|
30
|
my ($self, $h) = @_; |
110
|
25
|
|
|
|
|
55
|
return $self->_invert($h->{a}, $h->{c}); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub _F1 { |
114
|
23
|
|
|
23
|
|
28
|
my ($self, $h) = @_; |
115
|
23
|
|
|
|
|
62
|
return $self->_invert(2 * $h->{a}, $h->{b} + $h->{c}); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# Fills in precision, recall, etc. for each category, and computes their averages |
119
|
|
|
|
|
|
|
sub _macro_stats { |
120
|
10
|
|
|
10
|
|
12
|
my $self = shift; |
121
|
10
|
100
|
|
|
|
34
|
return $self->{macro} if $self->{macro}; |
122
|
|
|
|
|
|
|
|
123
|
4
|
|
|
|
|
10
|
my @metrics = qw(precision recall F1 accuracy error); |
124
|
|
|
|
|
|
|
|
125
|
4
|
|
|
|
|
6
|
my $cats = $self->{categories}; |
126
|
4
|
50
|
|
|
|
9
|
die "No category information has been recorded" |
127
|
|
|
|
|
|
|
unless keys %$cats; |
128
|
|
|
|
|
|
|
|
129
|
4
|
|
|
|
|
5
|
my %results; |
130
|
4
|
|
|
|
|
12
|
while (my ($cat, $scores) = each %$cats) { |
131
|
16
|
|
|
|
|
19
|
foreach my $metric (@metrics) { |
132
|
80
|
|
|
|
|
112
|
my $method = "_$metric"; |
133
|
80
|
|
|
|
|
162
|
$results{$metric} += ($scores->{$metric} = $self->$method($scores)); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
4
|
|
|
|
|
6
|
foreach (@metrics) { |
137
|
20
|
|
|
|
|
33
|
$results{$_} /= keys %$cats; |
138
|
|
|
|
|
|
|
} |
139
|
4
|
|
|
|
|
32
|
$self->{macro} = \%results; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
0
|
|
|
0
|
1
|
0
|
sub micro_accuracy { $_[0]->_accuracy( $_[0]) } |
143
|
2
|
|
|
2
|
1
|
5
|
sub micro_error { $_[0]->_error( $_[0]) } |
144
|
8
|
|
|
8
|
1
|
23
|
sub micro_precision { $_[0]->_precision($_[0]) } |
145
|
9
|
|
|
9
|
1
|
39
|
sub micro_recall { $_[0]->_recall( $_[0]) } |
146
|
7
|
|
|
7
|
1
|
19
|
sub micro_F1 { $_[0]->_F1( $_[0]) } |
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
0
|
1
|
0
|
sub macro_accuracy { shift()->_macro_stats->{accuracy} } |
149
|
0
|
|
|
0
|
1
|
0
|
sub macro_error { shift()->_macro_stats->{error} } |
150
|
3
|
|
|
3
|
1
|
6
|
sub macro_precision { shift()->_macro_stats->{precision} } |
151
|
4
|
|
|
4
|
1
|
9
|
sub macro_recall { shift()->_macro_stats->{recall} } |
152
|
3
|
|
|
3
|
1
|
7
|
sub macro_F1 { shift()->_macro_stats->{F1} } |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub category_stats { |
155
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
156
|
0
|
|
|
|
|
0
|
$self->_macro_stats; |
157
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
return $self->{categories}; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub stats_table { |
162
|
2
|
|
|
2
|
1
|
9
|
my $self = shift; |
163
|
2
|
|
50
|
|
|
9
|
my $figs = shift || 3; |
164
|
|
|
|
|
|
|
|
165
|
2
|
|
|
|
|
7
|
my @data = map $self->_sig_figs($_, $figs), |
166
|
|
|
|
|
|
|
( |
167
|
|
|
|
|
|
|
$self->macro_recall, |
168
|
|
|
|
|
|
|
$self->macro_precision, |
169
|
|
|
|
|
|
|
$self->macro_F1, |
170
|
|
|
|
|
|
|
$self->micro_recall, |
171
|
|
|
|
|
|
|
$self->micro_precision, |
172
|
|
|
|
|
|
|
$self->micro_F1, |
173
|
|
|
|
|
|
|
$self->micro_error, |
174
|
|
|
|
|
|
|
); |
175
|
|
|
|
|
|
|
|
176
|
2
|
|
|
|
|
5
|
my $m = 0; # Max length of @data items |
177
|
2
|
|
|
|
|
4
|
for (@data) { |
178
|
14
|
100
|
|
|
|
28
|
$m = length() if length() > $m; |
179
|
|
|
|
|
|
|
} |
180
|
2
|
|
|
|
|
6
|
my $s = ' ' x ($m - 4); |
181
|
|
|
|
|
|
|
|
182
|
2
|
|
|
|
|
6
|
my $out = "+" . ("-" x (10 + 7*$m)) . "+\n"; |
183
|
2
|
|
|
|
|
7
|
$out .= "| $s maR $s maP$s maF1 $s miR $s miP$s miF1 $s Err |\n"; |
184
|
2
|
|
|
|
|
6
|
$out .= "| %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s |\n"; |
185
|
2
|
|
|
|
|
5
|
$out .= "+" . ("-" x (10 + 7*$m)) . "+\n"; |
186
|
|
|
|
|
|
|
|
187
|
2
|
|
|
|
|
27
|
return sprintf($out, @data); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _sig_figs { |
191
|
14
|
|
|
14
|
|
18
|
my ($self, $number, $figs) = @_; |
192
|
14
|
100
|
|
|
|
41
|
my $after_point = $figs - int ($number != 0 ? log($number)/log(10) : 0); |
193
|
14
|
|
|
|
|
74
|
return sprintf "%.${after_point}f", $number; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
__END__ |