File Coverage

blib/lib/Hailo.pm
Criterion Covered Total %
statement 136 138 98.5
branch 42 50 84.0
condition 12 12 100.0
subroutine 31 31 100.0
pod 6 8 75.0
total 227 239 94.9


line stmt bran cond sub pod time code
1             package Hailo;
2             our $AUTHORITY = 'cpan:AVAR';
3             $Hailo::VERSION = '0.75';
4 42     42   11404105 use v5.10.0;
  42         435  
5 42     42   11454 use autodie qw(open close);
  42         347791  
  42         266  
6 42     42   26350 use Moose;
  42         6232751  
  42         332  
7 42     42   304918 use MooseX::StrictConstructor;
  42         805957  
  42         209  
8 42     42   341503 use MooseX::Types::Moose ':all';
  42         1038807  
  42         457  
9 42     42   338353 use File::Glob ':glob';
  42         90  
  42         10476  
10 42     42   309 use Class::Load qw(try_load_class);
  42         84  
  42         2154  
11 42     42   253 use Scalar::Util qw(blessed);
  42         187  
  42         2066  
12 42     42   282 use List::Util qw(first);
  42         80  
  42         2787  
13 42     42   269 use namespace::clean -except => 'meta';
  42         85  
  42         266  
14              
15 42         22874 use constant PLUGINS => [ qw[
16             Hailo::Engine::Default
17             Hailo::Engine::Scored
18             Hailo::Storage::MySQL
19             Hailo::Storage::PostgreSQL
20             Hailo::Storage::SQLite
21             Hailo::Tokenizer::Chars
22             Hailo::Tokenizer::Words
23             Hailo::UI::ReadLine
24 42     42   41817 ] ];
  42         83  
25              
26             has brain => (
27             isa => Str,
28             is => 'rw',
29             );
30              
31             has order => (
32             isa => Int,
33             is => 'rw',
34             default => 2,
35             trigger => sub {
36             my ($self, $order) = @_;
37             $self->_custom_order(1);
38             },
39             );
40              
41             has _custom_order => (
42             isa => Bool,
43             is => 'rw',
44             default => 0,
45             init_arg => undef,
46             documentation => "Here so we can differentiate between the default value of order being explictly set and being set by default",
47             );
48              
49             has _custom_tokenizer_class => (
50             isa => Bool,
51             is => 'rw',
52             default => 0,
53             init_arg => undef,
54             documentation => "Here so we can differentiate between the default value of tokenizer_class being explictly set and being set by default",
55             );
56              
57             has save_on_exit => (
58             isa => Bool,
59             is => 'rw',
60             default => 1,
61             );
62              
63             has brain_resource => (
64             documentation => "Alias for `brain' for backwards compatibility",
65             isa => Str,
66             is => 'rw',
67             trigger => sub {
68             my ($self, $brain) = @_;
69             $self->brain($brain);
70             },
71             );
72              
73             sub BUILD {
74 279     279 0 6881 my ($self) = @_;
75 279         6091 my $brain = $self->brain;
76 279 100       1373 return if !defined $brain;
77 240         12045 $self->brain(bsd_glob($brain));
78 240         4865 return;
79             }
80              
81             my %has = (
82             engine => {
83             name => 'Engine',
84             default => 'Default',
85             },
86             storage => {
87             name => 'Storage',
88             default => 'SQLite',
89             },
90             tokenizer => {
91             name => 'Tokenizer',
92             default => 'Words',
93             },
94             ui => {
95             name => 'UI',
96             default => 'ReadLine',
97             },
98             );
99              
100             for my $k (keys %has) {
101             my $name = $has{$k}->{name};
102             my $default = $has{$k}->{default};
103             my $method_class = "${k}_class";
104             my $method_args = "${k}_args";
105              
106             # working classes
107             has "${k}_class" => (
108             isa => Str,
109             is => "rw",
110             default => $default,
111             ($k eq 'tokenizer'
112             ? (trigger => sub {
113             my ($self, $class) = @_;
114             $self->_custom_tokenizer_class(1);
115             })
116             : ())
117             );
118              
119             # Object arguments
120             has "${k}_args" => (
121             documentation => "Arguments for the $name class",
122             isa => HashRef,
123             is => "ro",
124             default => sub { +{} },
125             );
126              
127             # Working objects
128             has "_${k}" => (
129             does => "Hailo::Role::$name",
130             lazy_build => 1,
131             is => 'ro',
132             init_arg => undef,
133             );
134              
135             # Generate the object itself
136 42     42   329 no strict 'refs';
  42         81  
  42         22453  
137             *{"_build__${k}"} = sub {
138 354     354   920 my ($self) = @_;
139              
140             my $obj = $self->_new_class(
141             $name,
142             $self->$method_class,
143             {
144             arguments => $self->$method_args,
145             ($k =~ /^(?:engine|storage)$/s
146             ? (order => $self->order)
147             : ()),
148             ($k eq 'engine'
149             ? (storage => $self->_storage)
150             : ()),
151             (($k eq 'storage' and defined $self->brain)
152             ? (
153 354 100 100     8668 hailo => do {
    100          
    100          
    100          
154 239         1354 require Scalar::Util;
155 239         1433 Scalar::Util::weaken(my $s = $self);
156              
157             my %callback = (
158 200     200   4530 has_custom_order => sub { $s->_custom_order },
159 3     3   70 has_custom_tokenizer_class => sub { $s->_custom_tokenizer_class },
160             set_order => sub {
161 1     1   3 my ($db_order) = @_;
162 1         23 $s->order($db_order);
163 1         21 $s->_engine->order($db_order);
164             },
165             set_tokenizer_class => sub {
166 1     1   3 my ($db_tokenizer_class) = @_;
167 1         23 $s->tokenizer_class($db_tokenizer_class);
168             },
169 239         2833 );
170              
171 239         5526 \%callback;
172             },
173             brain => $self->brain
174             )
175             : ()),
176             ($k eq 'storage'
177             ? (tokenizer_class => $self->tokenizer_class)
178             : ())
179             },
180             );
181              
182 351         9568 return $obj;
183             };
184             }
185              
186             sub _new_class {
187 354     354   1072 my ($self, $type, $class, $args) = @_;
188              
189 354         532 my $pkg;
190 354 50       904 if ($class =~ m[^\+(?<custom_plugin>.+)$]) {
191 39     39   20683 $pkg = $+{custom_plugin};
  39         15568  
  39         44825  
  0         0  
192             } else {
193 354         611 my @plugins = @{ $self->PLUGINS };
  354         1904  
194             # Be fuzzy about includes, e.g. DBD::SQLite or SQLite or sqlite will go
195 1836     1836   8374 $pkg = first { / $type : .* : $class /ix }
196 354         2490 sort { length $a <=> length $b }
  6018         8915  
197             @plugins;
198              
199 354 100       1820 unless ($pkg) {
200 3         7 local $" = ', ';
201 3         6 my @p = grep { /$type/ } @plugins;
  24         80  
202 3         68 die "Couldn't find a class name matching '$class' in plugins '@p'";
203             }
204             }
205              
206 351         1461 my ($success, $error) = try_load_class($pkg);
207 351 50       39331 die $error if !$success;
208              
209 351         11731 return $pkg->new(%$args);
210             }
211              
212             sub save {
213 259     259 1 592 my ($self, @args) = @_;
214 259         5384 $self->_storage->save(@args);
215 259         511 return;
216             }
217              
218             sub train {
219 23     23 1 2212 my ($self, $input, $fast) = @_;
220              
221 23         577 $self->_storage->start_training();
222              
223 23 100 100     287 if (not ref $input and defined $input and $input eq '-') {
    100 100        
    100          
    100          
224             # With STDIN
225 1 50       6 die "You must provide STDIN when training from '-'" if $self->_is_interactive(*STDIN);
226 0         0 $self->_train_fh(*STDIN, $fast);
227             } elsif (ref $input eq 'GLOB') {
228             # With a filehandle
229 1         7 $self->_train_fh($input, $fast);
230             } elsif (not ref $input) {
231             # With a file
232 13         72 open my $fh, '<:encoding(utf8)', $input;
233 11         29650 $self->_train_fh($fh, $fast, $input);
234             } elsif (ref $input eq 'ARRAY') {
235             # With an Array
236 7         25 for my $line (@$input) {
237 936         2320 $self->_learn_one($line, $fast);
238 936 50       24732 $self->_engine->flush_cache if !$fast;
239             }
240 7 50       30 $self->_engine->flush_cache if $fast;
241             } else {
242             # With something naughty
243 1         36 die "Unknown input: $input";
244             }
245              
246 19         535 $self->_storage->stop_training();
247              
248 19         174 return;
249             }
250              
251             sub _train_fh {
252 12     12   300 my ($self, $fh, $fast) = @_;
253              
254 12         313 while (my $line = <$fh>) {
255 990         2194 chomp $line;
256 990         2302 $self->_learn_one($line, $fast);
257 990 50       24337 $self->_engine->flush_cache if !$fast;
258             }
259 12 50       58 $self->_engine->flush_cache if $fast;
260              
261 12         342 return;
262             }
263              
264             sub learn {
265 1639     1639 1 869510 my ($self, $input) = @_;
266 1639         2812 my $inputs;
267              
268 1639 100       5295 if (not defined $input) {
    100          
    100          
269 2         55 die "Cannot learn from undef input";
270             } elsif (not ref $input) {
271 1618         3567 $inputs = [$input];
272             } elsif (ref $input eq 'ARRAY') {
273 17         35 $inputs = $input; # With an Array
274             } else {
275 2         84 die "Unknown input: $input";
276             }
277              
278 1635         47171 my $storage = $self->_storage;
279              
280 1634         5764 $storage->start_learning();
281 1634         5075 $self->_learn_one($_) for @$inputs;
282 1633         5556 $storage->stop_learning();
283 1633         5741 return;
284             }
285              
286             sub _learn_one {
287 3561     3561   7403 my ($self, $input, $fast) = @_;
288 3561         74945 my $engine = $self->_engine;
289              
290 3561         72954 my $tokens = $self->_tokenizer->make_tokens($input);
291 3560 50       14290 $fast ? $engine->learn_cached($tokens) : $engine->learn($tokens);
292              
293 3560         19445 return;
294             }
295              
296             sub learn_reply {
297 5     5 1 23 my ($self, $input) = @_;
298 5         20 $self->learn($input);
299 3         16 return $self->reply($input);
300             }
301              
302             sub reply {
303 1417     1417 1 305508 my ($self, $input) = @_;
304              
305 1417         42451 my $storage = $self->_storage;
306             # start_training() hasn't been called so we can't guarentee that
307             # the storage has been engaged at this point. This must be called
308             # before ->_engine() is called anywhere to ensure that the
309             # lazy-loading in the engine works.
310 1417 100       35445 $storage->_engage() unless $storage->_engaged;
311              
312 1216         25831 my $engine = $self->_engine;
313 1216         25084 my $tokenizer = $self->_tokenizer;
314              
315 1216         1805 my $reply;
316 1216 100       2588 if (defined $input) {
317 1207         4099 my $tokens = $tokenizer->make_tokens($input);
318 1207         3835 $reply = $engine->reply($tokens);
319             }
320             else {
321 9         49 $reply = $engine->reply();
322             }
323              
324 1216 100       3166 return unless defined $reply;
325 1213         4477 return $tokenizer->make_output($reply);
326             }
327              
328             sub stats {
329 29     29 1 335 my ($self) = @_;
330              
331 29         671 return $self->_storage->totals();
332             }
333              
334             sub DEMOLISH {
335 281     281 0 879 my ($self) = @_;
336 281 100 100     6811 $self->save() if blessed $self->{_storage} and $self->save_on_exit;
337 281         678 return;
338             }
339              
340             sub _is_interactive {
341 23     23   2780 require IO::Interactive;
342 23         28438 return IO::Interactive::is_interactive();
343             }
344              
345             __PACKAGE__->meta->make_immutable;
346              
347             =encoding utf8
348              
349             =head1 NAME
350              
351             Hailo - A pluggable Markov engine analogous to MegaHAL
352              
353             =head1 SYNOPSIS
354              
355             This is the synopsis for using Hailo as a module. See L<hailo> for
356             command-line invocation.
357              
358             # Hailo requires Perl 5.10
359             use v5.10.0;
360             use Moose;
361             use Hailo;
362              
363             # Construct a new in-memory Hailo using the SQLite backend. See
364             # backend documentation for other options.
365             my $hailo = Hailo->new;
366              
367             # Various ways to learn
368             my @train_this = ("I like big butts", "and I can not lie");
369             $hailo->learn(\@train_this);
370             $hailo->learn($_) for @train_this;
371              
372             # Heavy-duty training interface. Backends may drop some safety
373             # features like journals or synchronous IO to train faster using
374             # this mode.
375             $hailo->train("megahal.trn");
376             $hailo->train($filehandle);
377              
378             # Make the brain babble
379             say $hailo->reply("hello good sir.");
380             # Just say something at random
381             say $hailo->reply();
382              
383             =head1 DESCRIPTION
384              
385             Hailo is a fast and lightweight markov engine intended to replace
386             L<AI::MegaHAL|AI::MegaHAL>. It has a pluggable
387             L<storage|Hailo::Role::Storage>, L<tokenizer|Hailo::Role::Tokenizer>
388             and L<engine|Hailo::Role::Engine> backends.
389              
390             It is similar to MegaHAL in functionality, the main differences (with the
391             default backends) being better scalability, drastically less memory usage,
392             an improved tokenizer, and tidier output.
393              
394             With this distribution, you can create, modify, and query Hailo brains. To
395             use Hailo in event-driven POE applications, you can use the
396             L<POE::Component::Hailo|POE::Component::Hailo> wrapper. One example is
397             L<POE::Component::IRC::Plugin::Hailo|POE::Component::IRC::Plugin::Hailo>,
398             which implements an IRC chat bot.
399              
400             =head2 Etymology
401              
402             I<Hailo> is a portmanteau of I<HAL> (as in MegaHAL) and
403             L<failo|http://identi.ca/failo>.
404              
405             =head1 Backends
406              
407             Hailo supports pluggable L<storage|Hailo::Role::Storage> and
408             L<tokenizer|Hailo::Role::Tokenizer> backends, it also supports a
409             pluggable L<UI|Hailo::Role::UI> backend which is used by the L<hailo>
410             command-line utility.
411              
412             =head2 Storage
413              
414             Hailo can currently store its data in either a
415             L<SQLite|Hailo::Storage::SQLite>,
416             L<PostgreSQL|Hailo::Storage::PostgreSQL> or
417             L<MySQL|Hailo::Storage::MySQL> database. Some NoSQL backends were
418             supported in earlier versions, but they were removed as they had no
419             redeeming quality.
420              
421             SQLite is the primary target for Hailo. It's much faster and uses less
422             resources than the other two. It's highly recommended that you use it.
423              
424             See L<Hailo::Storage/"Comparison of backends"> for benchmarks showing
425             how the various backends compare under different workloads, and how
426             you can create your own.
427              
428             =head2 Tokenizer
429              
430             By default Hailo will use L<the word
431             tokenizer|Hailo::Tokenizer::Words> to split up input by whitespace,
432             taking into account things like quotes, sentence terminators and more.
433              
434             There's also a L<the character
435             tokenizer|Hailo::Tokenizer::Chars>. It's not generally useful for a
436             conversation bot but can be used to e.g. generate new words given a
437             list of existing words.
438              
439             =head1 UPGRADING
440              
441             Hailo makes no promises about brains generated with earlier versions
442             being compatable with future version and due to the way Hailo works
443             there's no practical way to make that promise. Learning in Hailo is
444             lossy so an accurate conversion is impossible.
445              
446             If you're maintaining a Hailo brain that you want to keep using you
447             should save the input you trained it on and re-train when you upgrade.
448              
449             Hailo is always going to lose information present in the input you
450             give it. How input tokens get split up and saved to the storage
451             backend depends on the version of the tokenizer being used and how
452             that input gets saved to the database.
453              
454             For instance if an earlier version of Hailo tokenized C<"foo+bar">
455             simply as C<"foo+bar"> but a later version split that up into
456             C<"foo", "+", "bar">, then an input of C<"foo+bar are my favorite
457             metasyntactic variables"> wouldn't take into account the existing
458             C<"foo+bar"> string in the database.
459              
460             Tokenizer changes like this would cause the brains to accumulate
461             garbage and would leave other parts in a state they wouldn't otherwise
462             have gotten into.
463              
464             There have been more drastic changes to the database format itself in
465             the past.
466              
467             Having said all that the database format and the tokenizer are
468             relatively stable. At the time of writing 0.33 is the latest release
469             and it's compatable with brains down to at least 0.17. If you're
470             upgrading and there isn't a big notice about the storage format being
471             incompatable in the F<Changes> file your old brains will probably work
472             just fine.
473              
474             =head1 ATTRIBUTES
475              
476             =head2 C<brain>
477              
478             The name of the brain (file name, database name) to use as storage.
479             There is no default. Whether this gets used at all depends on the
480             storage backend, currently only SQLite uses it.
481              
482             =head2 C<save_on_exit>
483              
484             A boolean value indicating whether Hailo should save its state before
485             its object gets destroyed. This defaults to true and will simply call
486             L<save|/save> at C<DEMOLISH> time.
487              
488             See L<Hailo::Storage::SQLite/"in_memory"> for how the SQLite backend
489             uses this option.
490              
491             =head2 C<order>
492              
493             The Markov order (chain length) you want to use for an empty brain.
494             The default is 2.
495              
496             =head2 C<engine_class>
497              
498             =head2 C<storage_class>
499              
500             =head2 C<tokenizer_class>
501              
502             =head2 C<ui_class>
503              
504             A a short name name of the class we use for the engine, storage,
505             tokenizer or ui backends.
506              
507             By default this is B<Default> for the engine, B<SQLite> for storage,
508             B<Words> for the tokenizer and B<ReadLine> for the UI. The UI backend
509             is only used by the L<hailo> command-line interface.
510              
511             You can only specify the short name of one of the packages Hailo
512             itself ships with. If you need another class then just prefix the
513             package with a plus (Catalyst style), e.g. C<+My::Foreign::Tokenizer>.
514              
515             =head2 C<engine_args>
516              
517             =head2 C<storage_args>
518              
519             =head2 C<tokenizer_args>
520              
521             =head2 C<ui_args>
522              
523             A C<HashRef> of arguments for engine/storage/tokenizer/ui
524             backends. See the documentation for the backends for what sort of
525             arguments they accept.
526              
527             =head1 METHODS
528              
529             =head2 C<new>
530              
531             This is the constructor. It accepts the attributes specified in
532             L</ATTRIBUTES>.
533              
534             =head2 C<learn>
535              
536             Takes a string or an array reference of strings and learns from them.
537              
538             =head2 C<train>
539              
540             Takes a filename, filehandle or array reference and learns from all its
541             lines. If a filename is passed, the file is assumed to be UTF-8 encoded.
542             Unlike L<C<learn>|/learn>, this method sacrifices some safety (disables
543             the database journal, fsyncs, etc) for speed while learning.
544              
545             You can prove a second parameter which, if true, will use aggressive
546             caching while training, which will speed things up considerably for large
547             inputs, but will take up quite a bit of memory.
548              
549             =head2 C<reply>
550              
551             Takes an optional line of text and generates a reply that might be relevant.
552              
553             =head2 C<learn_reply>
554              
555             Takes a string argument, learns from it, and generates a reply that
556             might be relevant. This is equivalent to calling L<learn|/learn>
557             followed by L<reply|/reply>.
558              
559             =head2 C<save>
560              
561             Tells the underlying storage backend to L<save its
562             state|Hailo::Role::Storage/"save">, any arguments to this method will
563             be passed as-is to the backend.
564              
565             =head2 C<stats>
566              
567             Takes no arguments. Returns the number of tokens, expressions, previous
568             token links and next token links.
569              
570             =head1 SUPPORT
571              
572             You can join the IRC channel I<#hailo> on FreeNode if you have questions.
573              
574             =head1 BUGS
575              
576             Bugs, feature requests and other issues are tracked in L<Hailo's RT on
577             rt.cpan.org|https://rt.cpan.org/Dist/Display.html?Name=Hailo>
578              
579             =head1 SEE ALSO
580              
581             =over
582              
583             =item * L<POE::Component::Hailo> - A non-blocking POE wrapper around Hailo
584              
585             =item * L<POE::Component::IRC::Plugin::Hailo> - A Hailo IRC bot plugin
586              
587             =item * L<http://github.com/hinrik/failo> - Failo, an IRC bot that uses Hailo
588              
589             =item * L<http://github.com/bingos/gumbybrain> - GumbyBRAIN, a more famous IRC bot that uses Hailo
590              
591             =item * L<Hailo::UI::Web> - A L<Catalyst> and jQuery powered web
592             interface to Hailo available at L<hailo.nix.is|http://hailo.nix.is>
593             and as L<hailo-ui-web|http://github.com/avar/hailo-ui-web> on
594             L<GitHub|http://github.com>
595              
596             =item * L<tweetmix|http://www.tweetmix.me/>, a random tweet generator powered by Hailo
597              
598             =item * L<http://github.com/pteichman/cobe> - cobe, a Python port of MegaHAL "inspired by the success of Hailo"
599              
600             =back
601              
602             =head1 LINKS
603              
604             =over
605              
606             =item * L<hailo.org|http://hailo.org> - Hailo's website
607              
608             =item * L<http://bit.ly/hailo_rewrite_of_megahal> - Hailo: A Perl rewrite of
609             MegaHAL, A blog posting about the motivation behind Hailo
610              
611             =item * L<http://blogs.perl.org/users/aevar_arnfjor_bjarmason/hailo/> -
612             More blog posts about Hailo on E<AElig>var ArnfjE<ouml>rE<eth>
613             Bjarmason's L<blogs.perl.org|http://blogs.perl.org> blog
614              
615             =item * Hailo on L<freshmeat|http://freshmeat.net/projects/hailo> and
616             L<ohloh|https://www.ohloh.net/p/hailo>
617              
618             =back
619              
620             =head1 AUTHORS
621              
622             Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com
623              
624             E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
625              
626             =head1 LICENSE AND COPYRIGHT
627              
628             Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and
629             E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
630              
631             This program is free software, you can redistribute it and/or modify
632             it under the same terms as Perl itself.
633              
634             =cut