File Coverage

blib/lib/Algorithm/NaiveBayes/Model/Discrete.pm

Criterion	Covered	Total	%
statement	38	38	100.0
branch	1	2	50.0
condition	3	4	75.0
subroutine	6	6	100.0
pod	0	3	0.0
total	48	53	90.5

line	stmt	bran	cond	sub	pod	time	code
1							package Algorithm::NaiveBayes::Model::Discrete;
2
3	1			1		6	use strict;
	1					2
	1					53
4	1			1		5	use base qw(Algorithm::NaiveBayes);
	1					2
	1					107
5	1			1		482	use Algorithm::NaiveBayes::Util qw(rescale);
	1					3
	1					602
6
7							sub do_add_instance {
8	200			200	0	220	my ($self, $attributes, $labels, $data) = @_;
9
10	200					247	foreach my $label ( @$labels ) {
11	200		100			420	my $mylabel = $data->{labels}{$label} \|\|= {};
12	200					197	$mylabel->{count}++;
13	200					472	while (my ($attr, $value) = each %$attributes) {
14	400					1559	$mylabel->{attrs}{$attr}{$value}++;
15							}
16							}
17							}
18
19							sub do_train {
20	1			1	0	2	my ($self, $training_data) = @_;
21	1					2	my $m = {};
22
23	1					16	my $instances = $self->instances;
24	1					3	my $labels = $training_data->{labels};
25	1					2	my $probs = $m->{probs} = {};
26
27							# Calculate the log-probabilities for each category
28	1					5	foreach my $label ($self->labels) {
29	2					20	$m->{prior_probs}{$label} = log($labels->{$label}{count} / $instances);
30
31	2					6	my $denominator = log($labels->{$label}{count});
32	2					3	while (my ($attribute, $values) = each %{ $labels->{$label}{attrs} }) {
	6					30
33	4					17	while (my ($value, $count) = each %$values) {
34	8					44	$probs->{$attribute}{$label}{$value} = log($count) - $denominator;
35							}
36							}
37							}
38
39	1					6	return $m;
40							}
41
42							sub do_predict {
43	1			1	0	2	my ($self, $m, $newattrs) = @_;
44
45							# Note that we're using the log(prob) here. That's why we add instead of multiply.
46
47	1					1	my %scores = %{$m->{prior_probs}};
	1					5
48	1					5	while (my ($feature, $value) = each %$newattrs) {
49	2	50				6	next unless exists $m->{probs}{$feature}; # Ignore totally unseen features
50	2					3	while (my ($label, $values) = each %{$m->{probs}{$feature}}) {
	6					22
51	4		50			17	$scores{$label} += ($values->{$value} \|\| 0);
52							}
53							}
54
55	1					5	rescale \%scores;
56	1					4	return \%scores;
57							}
58
59							1;