File Coverage

blib/lib/Hailo.pm
Criterion Covered Total %
statement 13 13 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 18 100.0


line stmt bran cond sub pod time code
1             package Hailo;
2             BEGIN {
3 14     14   343245 $Hailo::AUTHORITY = 'cpan:AVAR';
4             }
5             {
6             $Hailo::VERSION = '0.72';
7             }
8              
9 14     14   325 use 5.010;
  14         49  
  14         530  
10 14     14   17050 use autodie qw(open close);
  14         270781  
  14         94  
11 14     14   18194 use Any::Moose;
  14         497629  
  14         112  
12 14     14   9136 use Any::Moose 'X::StrictConstructor';
  14         136  
  14         72  
13             use File::Glob ':glob';
14             use Class::Load qw(try_load_class);
15             use Scalar::Util qw(blessed);
16             use List::Util qw(first);
17             use namespace::clean -except => 'meta';
18              
19             use constant PLUGINS => [ qw[
20             Hailo::Engine::Default
21             Hailo::Engine::Scored
22             Hailo::Storage::MySQL
23             Hailo::Storage::PostgreSQL
24             Hailo::Storage::SQLite
25             Hailo::Tokenizer::Chars
26             Hailo::Tokenizer::Words
27             Hailo::UI::ReadLine
28             ] ];
29              
30             has brain => (
31             isa => 'Str',
32             is => 'rw',
33             );
34              
35             has order => (
36             isa => 'Int',
37             is => 'rw',
38             default => 2,
39             trigger => sub {
40             my ($self, $order) = @_;
41             $self->_custom_order(1);
42             },
43             );
44              
45             has _custom_order => (
46             isa => 'Bool',
47             is => 'rw',
48             default => 0,
49             init_arg => undef,
50             documentation => "Here so we can differentiate between the default value of order being explictly set and being set by default",
51             );
52              
53             has _custom_tokenizer_class => (
54             isa => 'Bool',
55             is => 'rw',
56             default => 0,
57             init_arg => undef,
58             documentation => "Here so we can differentiate between the default value of tokenizer_class being explictly set and being set by default",
59             );
60              
61             has save_on_exit => (
62             isa => 'Bool',
63             is => 'rw',
64             default => 1,
65             );
66              
67             has brain_resource => (
68             documentation => "Alias for `brain' for backwards compatibility",
69             isa => 'Str',
70             is => 'rw',
71             trigger => sub {
72             my ($self, $brain) = @_;
73             $self->brain($brain);
74             },
75             );
76              
77             sub BUILD {
78             my ($self) = @_;
79             my $brain = $self->brain;
80             return if !defined $brain;
81             $self->brain(bsd_glob($brain));
82             return;
83             }
84              
85             my %has = (
86             engine => {
87             name => 'Engine',
88             default => 'Default',
89             },
90             storage => {
91             name => 'Storage',
92             default => 'SQLite',
93             },
94             tokenizer => {
95             name => 'Tokenizer',
96             default => 'Words',
97             },
98             ui => {
99             name => 'UI',
100             default => 'ReadLine',
101             },
102             );
103              
104             for my $k (keys %has) {
105             my $name = $has{$k}->{name};
106             my $default = $has{$k}->{default};
107             my $method_class = "${k}_class";
108             my $method_args = "${k}_args";
109              
110             # working classes
111             has "${k}_class" => (
112             isa => 'Str',
113             is => "rw",
114             default => $default,
115             ($k eq 'tokenizer'
116             ? (trigger => sub {
117             my ($self, $class) = @_;
118             $self->_custom_tokenizer_class(1);
119             })
120             : ())
121             );
122              
123             # Object arguments
124             has "${k}_args" => (
125             documentation => "Arguments for the $name class",
126             isa => 'HashRef',
127             is => "ro",
128             default => sub { +{} },
129             );
130              
131             # Working objects
132             has "_${k}" => (
133             does => "Hailo::Role::$name",
134             lazy_build => 1,
135             is => 'ro',
136             init_arg => undef,
137             );
138              
139             # Generate the object itself
140             no strict 'refs';
141             *{"_build__${k}"} = sub {
142             my ($self) = @_;
143              
144             my $obj = $self->_new_class(
145             $name,
146             $self->$method_class,
147             {
148             arguments => $self->$method_args,
149             ($k =~ /^(?:engine|storage)$/s
150             ? (order => $self->order)
151             : ()),
152             ($k eq 'engine'
153             ? (storage => $self->_storage)
154             : ()),
155             (($k eq 'storage' and defined $self->brain)
156             ? (
157             hailo => do {
158             require Scalar::Util;
159             Scalar::Util::weaken(my $s = $self);
160              
161             my %callback = (
162             has_custom_order => sub { $s->_custom_order },
163             has_custom_tokenizer_class => sub { $s->_custom_tokenizer_class },
164             set_order => sub {
165             my ($db_order) = @_;
166             $s->order($db_order);
167             $s->_engine->order($db_order);
168             },
169             set_tokenizer_class => sub {
170             my ($db_tokenizer_class) = @_;
171             $s->tokenizer_class($db_tokenizer_class);
172             },
173             );
174              
175             \%callback;
176             },
177             brain => $self->brain
178             )
179             : ()),
180             ($k eq 'storage'
181             ? (tokenizer_class => $self->tokenizer_class)
182             : ())
183             },
184             );
185              
186             return $obj;
187             };
188             }
189              
190             sub _new_class {
191             my ($self, $type, $class, $args) = @_;
192              
193             my $pkg;
194             if ($class =~ m[^\+(?.+)$]) {
195             $pkg = $+{custom_plugin};
196             } else {
197             my @plugins = @{ $self->PLUGINS };
198             # Be fuzzy about includes, e.g. DBD::SQLite or SQLite or sqlite will go
199             $pkg = first { / $type : .* : $class /ix }
200             sort { length $a <=> length $b }
201             @plugins;
202              
203             unless ($pkg) {
204             local $" = ', ';
205             my @p = grep { /$type/ } @plugins;
206             die "Couldn't find a class name matching '$class' in plugins '@p'";
207             }
208             }
209              
210             my ($success, $error) = try_load_class($pkg);
211             die $error if !$success;
212              
213             return $pkg->new(%$args);
214             }
215              
216             sub save {
217             my ($self, @args) = @_;
218             $self->_storage->save(@args);
219             return;
220             }
221              
222             sub train {
223             my ($self, $input, $fast) = @_;
224              
225             $self->_storage->start_training();
226              
227             if (not ref $input and defined $input and $input eq '-') {
228             # With STDIN
229             die "You must provide STDIN when training from '-'" if $self->_is_interactive(*STDIN);
230             $self->_train_fh(*STDIN, $fast);
231             } elsif (ref $input eq 'GLOB') {
232             # With a filehandle
233             $self->_train_fh($input, $fast);
234             } elsif (not ref $input) {
235             # With a file
236             open my $fh, '<:encoding(utf8)', $input;
237             $self->_train_fh($fh, $fast, $input);
238             } elsif (ref $input eq 'ARRAY') {
239             # With an Array
240             for my $line (@$input) {
241             $self->_learn_one($line, $fast);
242             $self->_engine->flush_cache if !$fast;
243             }
244             $self->_engine->flush_cache if $fast;
245             } else {
246             # With something naughty
247             die "Unknown input: $input";
248             }
249              
250             $self->_storage->stop_training();
251              
252             return;
253             }
254              
255             sub _train_fh {
256             my ($self, $fh, $fast) = @_;
257              
258             while (my $line = <$fh>) {
259             chomp $line;
260             $self->_learn_one($line, $fast);
261             $self->_engine->flush_cache if !$fast;
262             }
263             $self->_engine->flush_cache if $fast;
264              
265             return;
266             }
267              
268             sub learn {
269             my ($self, $input) = @_;
270             my $inputs;
271              
272             if (not defined $input) {
273             die "Cannot learn from undef input";
274             } elsif (not ref $input) {
275             $inputs = [$input];
276             } elsif (ref $input eq 'ARRAY') {
277             $inputs = $input; # With an Array
278             } else {
279             die "Unknown input: $input";
280             }
281              
282             my $storage = $self->_storage;
283              
284             $storage->start_learning();
285             $self->_learn_one($_) for @$inputs;
286             $storage->stop_learning();
287             return;
288             }
289              
290             sub _learn_one {
291             my ($self, $input, $fast) = @_;
292             my $engine = $self->_engine;
293              
294             my $tokens = $self->_tokenizer->make_tokens($input);
295             $fast ? $engine->learn_cached($tokens) : $engine->learn($tokens);
296              
297             return;
298             }
299              
300             sub learn_reply {
301             my ($self, $input) = @_;
302             $self->learn($input);
303             return $self->reply($input);
304             }
305              
306             sub reply {
307             my ($self, $input) = @_;
308              
309             my $storage = $self->_storage;
310             # start_training() hasn't been called so we can't guarentee that
311             # the storage has been engaged at this point. This must be called
312             # before ->_engine() is called anywhere to ensure that the
313             # lazy-loading in the engine works.
314             $storage->_engage() unless $storage->_engaged;
315              
316             my $engine = $self->_engine;
317             my $tokenizer = $self->_tokenizer;
318              
319             my $reply;
320             if (defined $input) {
321             my $tokens = $tokenizer->make_tokens($input);
322             $reply = $engine->reply($tokens);
323             }
324             else {
325             $reply = $engine->reply();
326             }
327              
328             return unless defined $reply;
329             return $tokenizer->make_output($reply);
330             }
331              
332             sub stats {
333             my ($self) = @_;
334              
335             return $self->_storage->totals();
336             }
337              
338             sub DEMOLISH {
339             my ($self) = @_;
340             $self->save() if blessed $self->{_storage} and $self->save_on_exit;
341             return;
342             }
343              
344             sub _is_interactive {
345             require IO::Interactive;
346             return IO::Interactive::is_interactive();
347             }
348              
349             __PACKAGE__->meta->make_immutable;
350              
351             =encoding utf8
352              
353             =head1 NAME
354              
355             Hailo - A pluggable Markov engine analogous to MegaHAL
356              
357             =head1 SYNOPSIS
358              
359             This is the synopsis for using Hailo as a module. See L for
360             command-line invocation.
361              
362             # Hailo requires Perl 5.10
363             use 5.010;
364             use Any::Moose;
365             use Hailo;
366              
367             # Construct a new in-memory Hailo using the SQLite backend. See
368             # backend documentation for other options.
369             my $hailo = Hailo->new;
370              
371             # Various ways to learn
372             my @train_this = ("I like big butts", "and I can not lie");
373             $hailo->learn(\@train_this);
374             $hailo->learn($_) for @train_this;
375              
376             # Heavy-duty training interface. Backends may drop some safety
377             # features like journals or synchronous IO to train faster using
378             # this mode.
379             $hailo->train("megahal.trn");
380             $hailo->train($filehandle);
381              
382             # Make the brain babble
383             say $hailo->reply("hello good sir.");
384             # Just say something at random
385             say $hailo->reply();
386              
387             =head1 DESCRIPTION
388              
389             Hailo is a fast and lightweight markov engine intended to replace
390             L. It has a L (or
391             L) based core with pluggable
392             L, L
393             and L backends.
394              
395             It is similar to MegaHAL in functionality, the main differences (with the
396             default backends) being better scalability, drastically less memory usage,
397             an improved tokenizer, and tidier output.
398              
399             With this distribution, you can create, modify, and query Hailo brains. To
400             use Hailo in event-driven POE applications, you can use the
401             L wrapper. One example is
402             L,
403             which implements an IRC chat bot.
404              
405             =head2 Etymology
406              
407             I is a portmanteau of I (as in MegaHAL) and
408             L.
409              
410             =head1 Backends
411              
412             Hailo supports pluggable L and
413             L backends, it also supports a
414             pluggable L backend which is used by the L
415             command-line utility.
416              
417             =head2 Storage
418              
419             Hailo can currently store its data in either a
420             L,
421             L or
422             L database. Some NoSQL backends were
423             supported in earlier versions, but they were removed as they had no
424             redeeming quality.
425              
426             SQLite is the primary target for Hailo. It's much faster and uses less
427             resources than the other two. It's highly recommended that you use it.
428              
429             See L for benchmarks showing
430             how the various backends compare under different workloads, and how
431             you can create your own.
432              
433             =head2 Tokenizer
434              
435             By default Hailo will use L
436             tokenizer|Hailo::Tokenizer::Words> to split up input by whitespace,
437             taking into account things like quotes, sentence terminators and more.
438              
439             There's also a L
440             tokenizer|Hailo::Tokenizer::Chars>. It's not generally useful for a
441             conversation bot but can be used to e.g. generate new words given a
442             list of existing words.
443              
444             =head1 UPGRADING
445              
446             Hailo makes no promises about brains generated with earlier versions
447             being compatable with future version and due to the way Hailo works
448             there's no practical way to make that promise. Learning in Hailo is
449             lossy so an accurate conversion is impossible.
450              
451             If you're maintaining a Hailo brain that you want to keep using you
452             should save the input you trained it on and re-train when you upgrade.
453              
454             Hailo is always going to lose information present in the input you
455             give it. How input tokens get split up and saved to the storage
456             backend depends on the version of the tokenizer being used and how
457             that input gets saved to the database.
458              
459             For instance if an earlier version of Hailo tokenized C<"foo+bar">
460             simply as C<"foo+bar"> but a later version split that up into
461             C<"foo", "+", "bar">, then an input of C<"foo+bar are my favorite
462             metasyntactic variables"> wouldn't take into account the existing
463             C<"foo+bar"> string in the database.
464              
465             Tokenizer changes like this would cause the brains to accumulate
466             garbage and would leave other parts in a state they wouldn't otherwise
467             have gotten into.
468              
469             There have been more drastic changes to the database format itself in
470             the past.
471              
472             Having said all that the database format and the tokenizer are
473             relatively stable. At the time of writing 0.33 is the latest release
474             and it's compatable with brains down to at least 0.17. If you're
475             upgrading and there isn't a big notice about the storage format being
476             incompatable in the F file your old brains will probably work
477             just fine.
478              
479             =head1 ATTRIBUTES
480              
481             =head2 C
482              
483             The name of the brain (file name, database name) to use as storage.
484             There is no default. Whether this gets used at all depends on the
485             storage backend, currently only SQLite uses it.
486              
487             =head2 C
488              
489             A boolean value indicating whether Hailo should save its state before
490             its object gets destroyed. This defaults to true and will simply call
491             L at C time.
492              
493             See L for how the SQLite backend
494             uses this option.
495              
496             =head2 C
497              
498             The Markov order (chain length) you want to use for an empty brain.
499             The default is 2.
500              
501             =head2 C
502              
503             =head2 C
504              
505             =head2 C
506              
507             =head2 C
508              
509             A a short name name of the class we use for the engine, storage,
510             tokenizer or ui backends.
511              
512             By default this is B for the engine, B for storage,
513             B for the tokenizer and B for the UI. The UI backend
514             is only used by the L command-line interface.
515              
516             You can only specify the short name of one of the packages Hailo
517             itself ships with. If you need another class then just prefix the
518             package with a plus (Catalyst style), e.g. C<+My::Foreign::Tokenizer>.
519              
520             =head2 C
521              
522             =head2 C
523              
524             =head2 C
525              
526             =head2 C
527              
528             A C of arguments for engine/storage/tokenizer/ui
529             backends. See the documentation for the backends for what sort of
530             arguments they accept.
531              
532             =head1 METHODS
533              
534             =head2 C
535              
536             This is the constructor. It accepts the attributes specified in
537             L.
538              
539             =head2 C
540              
541             Takes a string or an array reference of strings and learns from them.
542              
543             =head2 C
544              
545             Takes a filename, filehandle or array reference and learns from all its
546             lines. If a filename is passed, the file is assumed to be UTF-8 encoded.
547             Unlike L|/learn>, this method sacrifices some safety (disables
548             the database journal, fsyncs, etc) for speed while learning.
549              
550             You can prove a second parameter which, if true, will use aggressive
551             caching while training, which will speed things up considerably for large
552             inputs, but will take up quite a bit of memory.
553              
554             =head2 C
555              
556             Takes an optional line of text and generates a reply that might be relevant.
557              
558             =head2 C
559              
560             Takes a string argument, learns from it, and generates a reply that
561             might be relevant. This is equivalent to calling L
562             followed by L.
563              
564             =head2 C
565              
566             Tells the underlying storage backend to L
567             state|Hailo::Role::Storage/"save">, any arguments to this method will
568             be passed as-is to the backend.
569              
570             =head2 C
571              
572             Takes no arguments. Returns the number of tokens, expressions, previous
573             token links and next token links.
574              
575             =head1 SUPPORT
576              
577             You can join the IRC channel I<#hailo> on FreeNode if you have questions.
578              
579             =head1 BUGS
580              
581             Bugs, feature requests and other issues are tracked in L
582             rt.cpan.org|https://rt.cpan.org/Dist/Display.html?Name=Hailo>
583              
584             =head1 SEE ALSO
585              
586             =over
587              
588             =item * L - A non-blocking POE wrapper around Hailo
589              
590             =item * L - A Hailo IRC bot plugin
591              
592             =item * L - Failo, an IRC bot that uses Hailo
593              
594             =item * L - GumbyBRAIN, a more famous IRC bot that uses Hailo
595              
596             =item * L - A L and jQuery powered web
597             interface to Hailo available at L
598             and as L on
599             L
600              
601             =item * L, a random tweet generator powered by Hailo
602              
603             =item * L - cobe, a Python port of MegaHAL "inspired by the success of Hailo"
604              
605             =back
606              
607             =head1 LINKS
608              
609             =over
610              
611             =item * L - Hailo's website
612              
613             =item * L - Hailo: A Perl rewrite of
614             MegaHAL, A blog posting about the motivation behind Hailo
615              
616             =item * L -
617             More blog posts about Hailo on Evar ArnfjErE
618             Bjarmason's L blog
619              
620             =item * Hailo on L and
621             L
622              
623             =back
624              
625             =head1 AUTHORS
626              
627             Hinrik Ern SigurEsson, hinrik.sig@gmail.com
628              
629             Evar ArnfjErE Bjarmason
630              
631             =head1 LICENSE AND COPYRIGHT
632              
633             Copyright 2010 Hinrik Ern SigurEsson and
634             Evar ArnfjErE Bjarmason
635              
636             This program is free software, you can redistribute it and/or modify
637             it under the same terms as Perl itself.
638              
639             =cut