File Coverage

blib/lib/Lingua/JA/Categorize/Categorizer.pm
Criterion Covered Total %
statement 34 80 42.5
branch 0 2 0.0
condition 0 3 0.0
subroutine 11 16 68.7
pod 4 4 100.0
total 49 105 46.6


line stmt bran cond sub pod time code
1             package Lingua::JA::Categorize::Categorizer;
2 1     1   588 use strict;
  1         2  
  1         34  
3 1     1   5 use warnings;
  1         1  
  1         25  
4 1     1   1483 use Algorithm::NaiveBayes;
  1         5720  
  1         26  
5 1     1   589 use Lingua::JA::Categorize::Result;
  1         4  
  1         16  
6 1     1   37 use base qw( Lingua::JA::Categorize::Base );
  1         2  
  1         125  
7              
8             __PACKAGE__->mk_accessors($_) for qw( brain );
9              
10             sub import {
11 1     1   12094 use Algorithm::NaiveBayes::Util;
  1         865  
  1         68  
12 1     1   7 use List::Util qw(min max sum);
  1         3  
  1         69  
13 1     1   6 no warnings 'redefine';
  1         2  
  1         501  
14             *Algorithm::NaiveBayes::Util::rescale = sub {
15 0     0   0 my ($scores) = @_;
16 0         0 my $min = min( values %$scores );
17 0         0 my $sum = sum( values %$scores );
18 0         0 $sum -= $min * ( keys %$scores );
19 0         0 for ( sort { $scores->{$b} <=> $scores->{$a} } keys %$scores ) {
  0         0  
20 0         0 $scores->{$_} = ( $scores->{$_} - $min ) / $sum;
21             }
22 0         0 my $max = max( values %$scores );
23 0         0 for ( sort { $scores->{$b} <=> $scores->{$a} } keys %$scores ) {
  0         0  
24 0         0 $scores->{$_} = sprintf( "%0.2f", $scores->{$_} / $max );
25             }
26 0         0 return $scores;
27 1     1   39 };
28             }
29              
30             sub new {
31 1     1 1 14 my $class = shift;
32 1         14 my $self = $class->SUPER::new(@_);
33 1         10 $self->brain( Algorithm::NaiveBayes->new( purge => 0 ) );
34             #use Devel::Size qw(size total_size);
35             #use Devel::Peek;
36             {
37 1     1   18 no warnings 'redefine';
  1         3  
  1         692  
  1         1913  
38             *Algorithm::NaiveBayes::Model::Frequency::do_predict = sub {
39 0     0   0 my ( $self, $m, $newattrs ) = @_;
40             # print "IN :", total_size($m), "\n";
41 0         0 my %scores = %{ $m->{prior_probs} };
  0         0  
42 0         0 while ( my ( $feature, $value ) = each %$newattrs ) {
43 0 0       0 unless ( exists $m->{attributes}{$feature} ) {
44 0         0 push( @{ $self->{no_match_features} }, $feature );
  0         0  
45 0         0 next;
46             }
47             else {
48 0         0 push( @{ $self->{match_features} }, $feature );
  0         0  
49             }
50 0         0 while ( my ( $label, $attributes ) = each %{ $m->{probs} } ) {
  0         0  
51 0   0     0 my $p = ($attributes->{$feature} || $m->{smoother}{$label});
52 0         0 $scores{$label} += $p * $value;
53              
54             #$scores{$label} +=
55             # ( $attributes->{$feature} || $m->{smoother}->{$label} ) *
56             # $value;
57             }
58             }
59             # print "OUT:", total_size($m), "\n";
60 0         0 Algorithm::NaiveBayes::Util::rescale( \%scores );
61              
62 0         0 return \%scores;
63 1         23 };
64             }
65 1         5 return $self;
66             }
67              
68             sub categorize {
69 0     0 1   my $self = shift;
70 0           my $word_set = shift;
71 0           my $user_extention = shift;
72 0           $self->brain->{no_match_features} = [];
73 0           $self->brain->{match_features} = [];
74 0           my $score = $self->brain->predict( attributes => $word_set );
75 0           my $no_matches = $self->brain->{no_match_features};
76 0           my $matches = $self->brain->{match_features};
77 0           my $result = Lingua::JA::Categorize::Result->new(
78             context => $self->context,
79             score => $score,
80             matches => $matches,
81             no_matches => $no_matches,
82             word_set => $word_set,
83             user_extention => $user_extention,
84             );
85 0           return $result;
86             }
87              
88             sub save {
89 0     0 1   my $self = shift;
90 0           my $save_file = shift;
91 0           $self->brain->save_state($save_file);
92             }
93              
94             sub load {
95 0     0 1   my $self = shift;
96 0           my $save_file = shift;
97 0           my $brain = $self->brain;
98 0           $brain = Algorithm::NaiveBayes->restore_state($save_file);
99 0           $self->brain($brain);
100             }
101              
102             1;
103             __END__