File Coverage

blib/lib/Lingua/JA/Categorize/Categorizer.pm

Criterion	Covered	Total	%
statement	34	80	42.5
branch	0	2	0.0
condition	0	3	0.0
subroutine	11	16	68.7
pod	4	4	100.0
total	49	105	46.6

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::JA::Categorize::Categorizer;
2	1			1		588	use strict;
	1					2
	1					34
3	1			1		5	use warnings;
	1					1
	1					25
4	1			1		1483	use Algorithm::NaiveBayes;
	1					5720
	1					26
5	1			1		589	use Lingua::JA::Categorize::Result;
	1					4
	1					16
6	1			1		37	use base qw( Lingua::JA::Categorize::Base );
	1					2
	1					125
7
8							__PACKAGE__->mk_accessors($_) for qw( brain );
9
10							sub import {
11	1			1		12094	use Algorithm::NaiveBayes::Util;
	1					865
	1					68
12	1			1		7	use List::Util qw(min max sum);
	1					3
	1					69
13	1			1		6	no warnings 'redefine';
	1					2
	1					501
14							*Algorithm::NaiveBayes::Util::rescale = sub {
15	0			0		0	my ($scores) = @_;
16	0					0	my $min = min( values %$scores );
17	0					0	my $sum = sum( values %$scores );
18	0					0	$sum -= $min * ( keys %$scores );
19	0					0	for ( sort { $scores->{$b} <=> $scores->{$a} } keys %$scores ) {
	0					0
20	0					0	$scores->{$_} = ( $scores->{$_} - $min ) / $sum;
21							}
22	0					0	my $max = max( values %$scores );
23	0					0	for ( sort { $scores->{$b} <=> $scores->{$a} } keys %$scores ) {
	0					0
24	0					0	$scores->{$_} = sprintf( "%0.2f", $scores->{$_} / $max );
25							}
26	0					0	return $scores;
27	1			1		39	};
28							}
29
30							sub new {
31	1			1	1	14	my $class = shift;
32	1					14	my $self = $class->SUPER::new(@_);
33	1					10	$self->brain( Algorithm::NaiveBayes->new( purge => 0 ) );
34							#use Devel::Size qw(size total_size);
35							#use Devel::Peek;
36							{
37	1			1		18	no warnings 'redefine';
	1					3
	1					692
	1					1913
38							*Algorithm::NaiveBayes::Model::Frequency::do_predict = sub {
39	0			0		0	my ( $self, $m, $newattrs ) = @_;
40							# print "IN :", total_size($m), "\n";
41	0					0	my %scores = %{ $m->{prior_probs} };
	0					0
42	0					0	while ( my ( $feature, $value ) = each %$newattrs ) {
43	0	0				0	unless ( exists $m->{attributes}{$feature} ) {
44	0					0	push( @{ $self->{no_match_features} }, $feature );
	0					0
45	0					0	next;
46							}
47							else {
48	0					0	push( @{ $self->{match_features} }, $feature );
	0					0
49							}
50	0					0	while ( my ( $label, $attributes ) = each %{ $m->{probs} } ) {
	0					0
51	0		0			0	my $p = ($attributes->{$feature} \|\| $m->{smoother}{$label});
52	0					0	$scores{$label} += $p * $value;
53
54							#$scores{$label} +=
55							# ( $attributes->{$feature} \|\| $m->{smoother}->{$label} ) *
56							# $value;
57							}
58							}
59							# print "OUT:", total_size($m), "\n";
60	0					0	Algorithm::NaiveBayes::Util::rescale( \%scores );
61
62	0					0	return \%scores;
63	1					23	};
64							}
65	1					5	return $self;
66							}
67
68							sub categorize {
69	0			0	1		my $self = shift;
70	0						my $word_set = shift;
71	0						my $user_extention = shift;
72	0						$self->brain->{no_match_features} = [];
73	0						$self->brain->{match_features} = [];
74	0						my $score = $self->brain->predict( attributes => $word_set );
75	0						my $no_matches = $self->brain->{no_match_features};
76	0						my $matches = $self->brain->{match_features};
77	0						my $result = Lingua::JA::Categorize::Result->new(
78							context => $self->context,
79							score => $score,
80							matches => $matches,
81							no_matches => $no_matches,
82							word_set => $word_set,
83							user_extention => $user_extention,
84							);
85	0						return $result;
86							}
87
88							sub save {
89	0			0	1		my $self = shift;
90	0						my $save_file = shift;
91	0						$self->brain->save_state($save_file);
92							}
93
94							sub load {
95	0			0	1		my $self = shift;
96	0						my $save_file = shift;
97	0						my $brain = $self->brain;
98	0						$brain = Algorithm::NaiveBayes->restore_state($save_file);
99	0						$self->brain($brain);
100							}
101
102							1;
103							__END__