File Coverage

blib/lib/Hailo/Engine/Scored.pm

Criterion	Covered	Total	%
statement	14	129	10.8
branch	0	52	0.0
condition	0	14	0.0
subroutine	5	16	31.2
pod	0	1	0.0
total	19	212	8.9

line	stmt	bran	cond	sub	pod	time	code
1							package Hailo::Engine::Scored;
2							our $AUTHORITY = 'cpan:AVAR';
3							$Hailo::Engine::Scored::VERSION = '0.75';
4	1			1		1652	use v5.10.0;
	1					4
5	1			1		5	use Moose;
	1					2
	1					7
6	1			1		6999	use List::Util qw<sum>;
	1					2
	1					72
7	1			1		6	use List::MoreUtils qw<any>;
	1					2
	1					9
8	1			1		708	use Time::HiRes qw<gettimeofday tv_interval>;
	1					3
	1					10
9
10							extends 'Hailo::Engine::Default';
11
12							after BUILD => sub {
13							my ($self) = @_;
14							my %args = $self->arguments;
15
16							if (defined $args{iterations} && defined $args{interval}) {
17							die __PACKAGE__.": You can only specify one of 'iterations' and 'interval'\n";
18							}
19							return;
20							};
21
22							sub reply {
23	0			0	0		my $self = shift;
24	0		0				my $tokens = shift // [];
25
26							# see if we recognize any of the input tokens
27	0						my $token_cache = $self->_resolve_input_tokens($tokens);
28	0						my @input_token_ids = keys %$token_cache;
29	0						my @token_counts;
30
31							# let's select potential pivot tokens from the input
32	0	0					if (keys %$token_cache) {
33							# we only want the ones with normal spacing (usually normal words)
34							@token_counts = map {
35	0	0					$token_cache->{$_}[0] == 0 ? [$_, $token_cache->{$_}[2]] : ()
	0
36							} keys %$token_cache;
37							}
38
39	0						my $token_probs = $self->_get_pivot_probabilites(\@token_counts);
40	0						my @started = gettimeofday();
41	0						my $iterations = 0;
42
43	0						my $done;
44	0						my %args = $self->arguments;
45	0	0	0				if (!defined $args{iterations} && !defined $args{interval}) {
46							# construct replies for half a second by default
47	0						$args{interval} = 0.5;
48							}
49
50	0	0					if (defined $args{iterations}) {
51							$done = sub {
52	0	0		0			return 1 if $iterations == $args{iterations};
53	0						};
54							}
55							else {
56							$done = sub {
57	0			0			my $elapsed = tv_interval(\@started, [gettimeofday]);
58	0	0					return 1 if $elapsed >= $args{interval};
59	0						};
60							}
61
62	0						my (%link_cache, %expr_cache, $best_score, $best_reply);
63	0						while (1) {
64	0						$iterations++;
65	0						my $reply = $self->_generate_reply($token_probs, \%expr_cache);
66	0	0					return if !defined $reply; # we don't know any expressions yet
67
68	0						my $score = $self->_evaluate_reply(\@input_token_ids, $reply, \%link_cache);
69
70	0	0	0				if (defined $best_reply && $self->_too_similar(\@input_token_ids, $reply)) {
71	0	0					last if $done->();
72	0						next;
73							}
74
75	0	0	0				if (!defined $best_score \|\| $score > $best_score) {
76	0						$best_score = $score;
77	0						$best_reply = $reply;
78							}
79
80	0	0					last if $done->();
81							}
82
83							# translate token ids to token spacing/text
84							my @output = map {
85	0		0				$token_cache->{$_} // ($token_cache->{$_} = $self->_token_info($_))
	0
86							} @$best_reply;
87	0						return \@output;
88							}
89
90							# Calculate the probability we wish to pick each token as the pivot.
91							# This uses -log2(p) as a method for inverting token probability,
92							# ensuring that our rarer tokens are picked more often.
93							sub _get_pivot_probabilites {
94	0			0			my ($self, $token_counts) = @_;
95
96	0	0					return [] if !@$token_counts;
97	0	0					return [[$token_counts->[0], 1]] if @$token_counts == 1;
98
99							# calculate the (non-normalized) probability we want each to occur
100	0						my $count_sum = sum(map { $_->[1] } @$token_counts);
	0
101	0						my $p = [];
102	0						my $p_sum = 0;
103	0						for my $token_count (map { $_->[1] } @$token_counts) {
	0
104	0						my $token_p = -log(($token_count/$count_sum))/log(2);
105	0						push @$p, $token_p;
106	0						$p_sum += $token_p;
107							}
108
109							# normalize the probabilities
110							my @probs = map {
111	0						[$token_counts->[$_], $p->[$_] / $p_sum];
112	0						} 0..$#{ $token_counts };
	0
113
114	0						return \@probs;
115							}
116
117							sub _generate_reply {
118	0			0			my ($self, $token_probs, $expr_cache) = @_;
119
120	0						my ($pivot_expr_id, @token_ids) = @_;
121	0	0					if (@$token_probs) {
122	0						my $pivot_token_id = $self->_choose_pivot($token_probs);
123	0						($pivot_expr_id, @token_ids) = $self->_random_expr($pivot_token_id);
124							}
125							else {
126	0						($pivot_expr_id, @token_ids) = $self->_random_expr();
127	0	0					return if !defined $pivot_expr_id; # no expressions in the database
128							}
129
130							# construct the end of the reply
131	0						$self->_construct_reply('next', $pivot_expr_id, \@token_ids, $expr_cache);
132
133							# construct the beginning of the reply
134	0						$self->_construct_reply('prev', $pivot_expr_id, \@token_ids, $expr_cache);
135
136	0						return \@token_ids;
137							}
138
139							sub _evaluate_reply {
140	0			0			my ($self, $input_token_ids, $reply_token_ids, $cache) = @_;
141	0						my $order = $self->order;
142	0						my $score = 0;
143
144	0						for my $idx (0 .. $#{ $reply_token_ids } - $order) {
	0
145	0						my $next_token_id = $reply_token_ids->[$idx];
146
147	0	0		0			if (any { $_ == $next_token_id } @$input_token_ids) {
	0
148	0						my @expr = @$reply_token_ids[$idx .. $idx+$order-1];
149	0						my $key = join('_', @expr)."-$next_token_id";
150
151	0	0					if (!defined $cache->{$key}) {
152	0						$cache->{$key} = $self->_expr_token_probability('next', \@expr, $next_token_id);
153							}
154	0	0					if ($cache->{$key} > 0) {
155	0						$score -= log($cache->{$key})/log(2);
156							}
157							}
158							}
159
160	0						for my $idx (0 .. $#{ $reply_token_ids } - $order) {
	0
161	0						my $prev_token_id = $reply_token_ids->[$idx];
162
163	0	0		0			if (any { $_ == $prev_token_id } @$input_token_ids) {
	0
164	0						my @expr = @$reply_token_ids[$idx+1 .. $idx+$order];
165	0						my $key = "$prev_token_id-".join('_', @expr);
166
167	0	0					if (!defined $cache->{$key}) {
168	0						$cache->{$key} = $self->_expr_token_probability('prev', \@expr, $prev_token_id);
169							}
170	0	0					if ($cache->{$key} > 0) {
171	0						$score -= log($cache->{$key})/log(2);
172							}
173							}
174							}
175
176							# Prefer shorter replies. This behavior is present but not
177							# documented in recent MegaHAL.
178	0						my $score_divider = 1;
179	0	0					if (@$reply_token_ids >= 8) {
		0
180	0						$score /= sqrt(@$reply_token_ids - 1);
181							}
182							elsif (@$reply_token_ids >= 16) {
183	0						$score /= @$reply_token_ids;
184							}
185
186	0						return $score;
187							}
188
189							sub _expr_token_probability {
190	0			0			my ($self, $pos, $expr, $token_id) = @_;
191	0						my $order = $self->order;
192
193	0						my $expr_id = $self->_expr_id_add($expr);
194
195	0						$self->{"_sth_${pos}_token_count"}->execute($expr_id, $token_id);
196	0						my $expr2token = $self->{"_sth_${pos}_token_count"}->fetchrow_array();
197	0	0					return 0 if !$expr2token;
198
199	0						$self->{"_sth_${pos}_token_links"}->execute($expr_id);
200	0						my $expr2all = $self->{"_sth_${pos}_token_links"}->fetchrow_array();
201	0						return $expr2token / $expr2all;
202							}
203
204							sub _choose_pivot {
205	0			0			my ($self, $token_probs) = @_;
206
207	0						my $random = rand;
208	0						my $p = 0;
209	0						for my $token (@$token_probs) {
210	0						$p += $token->[1];
211	0	0					return $token->[0][0] if $p > $random;
212							}
213
214	0						return;
215							}
216
217							sub _too_similar {
218	0			0			my ($self, $input_token_ids, $reply_token_ids) = @_;
219
220	0						my %input_token_ids = map { +$_ => 1 } @$input_token_ids;
	0
221
222	0						for my $reply_token_id (@$reply_token_ids) {
223	0	0					return if !$input_token_ids{$reply_token_id};
224							}
225	0						return 1;
226							}
227
228							__PACKAGE__->meta->make_immutable;
229
230							=encoding utf8
231
232							=head1 NAME
233
234							Hailo::Engine::Scored - MegaHAL-style reply scoring for L<Hailo\|Hailo>
235
236							=head1 DESCRIPTION
237
238							This backend implements the logic of replying to and learning from
239							input using the resources given to the L<engine
240							roles\|Hailo::Role::Engine>. It is inherits from
241							L<Hailo::Engine::Default\|Hailo::Engine::Default> and only overrides its
242							C<reply> method.
243
244							It generates multiple replies and applies a scoring algorithm to them, then
245							returns the best one, similar to MegaHAL.
246
247							=head1 ATTRIBUTES
248
249							=head2 C<engine_args>
250
251							This is a hash reference which can have the following keys:
252
253							=head3 C<iterations>
254
255							The number of replies to generate before returning the best one.
256
257							=head3 C<interval>
258
259							The time (in seconds) to spend on generating replies before returning the
260							best one.
261
262							You can not specify both C<iterations> and C<interval> at the same time. If
263							neither is specified, a default C<interval> of 0.5 seconds will be used.
264
265							=head1 AUTHORS
266
267							Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com
268
269							This module was based on code from Peter Teichman's Cobe project.
270
271							=head1 LICENSE AND COPYRIGHT
272
273							Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and
274							E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
275
276							This program is free software, you can redistribute it and/or modify
277							it under the same terms as Perl itself.
278
279							=cut