File Coverage

blib/lib/Chatbot/Eliza.pm
Criterion Covered Total %
statement 136 235 57.8
branch 36 88 40.9
condition 11 30 36.6
subroutine 11 15 73.3
pod 6 6 100.0
total 200 374 53.4


line stmt bran cond sub pod time code
1             ###################################################################
2              
3             package Chatbot::Eliza;
4             $Chatbot::Eliza::VERSION = '1.07';
5             # Copyright (c) 1997-2003 John Nolan. All rights reserved.
6             # This program is free software. You may modify and/or
7             # distribute it under the same terms as Perl itself.
8             # This copyright notice must remain attached to the file.
9             #
10             # You can run this file through either pod2man or pod2html
11             # to produce pretty documentation in manual or html file format
12             # (these utilities are part of the Perl 5 distribution).
13             #
14             # POD documentation is distributed throughout the actual code
15             # so that it also functions as comments.
16              
17             require 5.006;
18 2     2   1088 use strict;
  2         2  
  2         46  
19 2     2   6 use warnings;
  2         2  
  2         48  
20 2     2   11 use Carp;
  2         2  
  2         4828  
21              
22             our $AUTOLOAD;
23              
24              
25              
26             ####################################################################
27             # ---{ B E G I N P O D D O C U M E N T A T I O N }--------------
28             #
29              
30             =head1 NAME
31              
32             B - A clone of the classic Eliza program
33              
34             =head1 SYNOPSIS
35              
36             use Chatbot::Eliza;
37              
38             $mybot = new Chatbot::Eliza;
39             $mybot->command_interface;
40              
41             # see below for details
42              
43              
44             =head1 DESCRIPTION
45              
46             This module implements the classic Eliza algorithm.
47             The original Eliza program was written by Joseph
48             Weizenbaum and described in the Communications
49             of the ACM in 1966. Eliza is a mock Rogerian
50             psychotherapist. It prompts for user input,
51             and uses a simple transformation algorithm
52             to change user input into a follow-up question.
53             The program is designed to give the appearance
54             of understanding.
55              
56             This program is a faithful implementation of the program
57             described by Weizenbaum. It uses a simplified script
58             language (devised by Charles Hayden). The content
59             of the script is the same as Weizenbaum's.
60              
61             This module encapsulates the Eliza algorithm
62             in the form of an object. This should make
63             the functionality easy to incorporate in larger programs.
64              
65              
66             =head1 INSTALLATION
67              
68             The current version of Chatbot::Eliza.pm is available on CPAN:
69              
70             http://www.perl.com/CPAN/modules/by-module/Chatbot/
71              
72             To install this package, just change to the directory which
73             you created by untarring the package, and type the following:
74              
75             perl Makefile.PL
76             make test
77             make
78             make install
79              
80             This will copy Eliza.pm to your perl library directory for
81             use by all perl scripts. You probably must be root to do this,
82             unless you have installed a personal copy of perl.
83              
84              
85             =head1 USAGE
86              
87             This is all you need to do to launch a simple
88             Eliza session:
89              
90             use Chatbot::Eliza;
91              
92             $mybot = new Chatbot::Eliza;
93             $mybot->command_interface;
94              
95             You can also customize certain features of the
96             session:
97              
98             $myotherbot = new Chatbot::Eliza;
99              
100             $myotherbot->name( "Hortense" );
101             $myotherbot->debug( 1 );
102              
103             $myotherbot->command_interface;
104              
105             These lines set the name of the bot to be
106             "Hortense" and turn on the debugging output.
107              
108             When creating an Eliza object, you can specify
109             a name and an alternative scriptfile:
110              
111             $bot = new Chatbot::Eliza "Brian", "myscript.txt";
112              
113             You can also use an anonymous hash to set these parameters.
114             Any of the fields can be initialized using this syntax:
115              
116             $bot = new Chatbot::Eliza {
117             name => "Brian",
118             scriptfile => "myscript.txt",
119             debug => 1,
120             prompts_on => 1,
121             memory_on => 0,
122             myrand =>
123             sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); },
124             };
125              
126             If you don't specify a script file, then the new object will be
127             initialized with a default script. The module contains this
128             script within itself.
129              
130             You can use any of the internal functions in
131             a calling program. The code below takes an
132             arbitrary string and retrieves the reply from
133             the Eliza object:
134              
135             my $string = "I have too many problems.";
136             my $reply = $mybot->transform( $string );
137              
138             You can easily create two bots, each with a different
139             script, and see how they interact:
140              
141             use Chatbot::Eliza
142              
143             my ($harry, $sally, $he_says, $she_says);
144              
145             $sally = new Chatbot::Eliza "Sally", "histext.txt";
146             $harry = new Chatbot::Eliza "Harry", "hertext.txt";
147              
148             $he_says = "I am sad.";
149              
150             # Seed the random number generator.
151             srand( time ^ ($$ + ($$ << 15)) );
152              
153             while (1) {
154             $she_says = $sally->transform( $he_says );
155             print $sally->name, ": $she_says \n";
156              
157             $he_says = $harry->transform( $she_says );
158             print $harry->name, ": $he_says \n";
159             }
160              
161             Mechanically, this works well. However, it critically depends
162             on the actual script data. Having two mock Rogerian therapists
163             talk to each other usually does not produce any sensible conversation,
164             of course.
165              
166             After each call to the transform() method, the debugging output
167             for that transformation is stored in a variable called $debug_text.
168              
169             my $reply = $mybot->transform( "My foot hurts" );
170             my $debugging = $mybot->debug_text;
171              
172             This feature always available, even if the instance's $debug
173             variable is set to 0.
174              
175             Calling programs can specify their own random-number generators.
176             Use this syntax:
177              
178             $chatbot = new Chatbot::Eliza;
179             $chatbot->myrand(
180             sub {
181             #function goes here!
182             }
183             );
184              
185             The custom random function should have the same prototype
186             as perl's built-in rand() function. That is, it should take
187             a single (numeric) expression as a parameter, and it should
188             return a floating-point value between 0 and that number.
189              
190             What this code actually does is pass a reference to an anonymous
191             subroutine ("code reference"). Make sure you've read the perlref
192             manpage for details on how code references actually work.
193              
194             If you don't specify any custom rand function, then the Eliza
195             object will just use the built-in rand() function.
196              
197             =head1 MAIN DATA MEMBERS
198              
199             Each Eliza object uses the following data structures
200             to hold the script data in memory:
201              
202             =head2 %decomplist
203              
204             I: the set of keywords; I: strings containing
205             the decomposition rules.
206              
207             =head2 %reasmblist
208              
209             I: a set of values which are each the join
210             of a keyword and a corresponding decomposition rule;
211             I: the set of possible reassembly statements
212             for that keyword and decomposition rule.
213              
214             =head2 %reasmblist_for_memory
215              
216             This structure is identical to C<%reasmblist>, except
217             that these rules are only invoked when a user comment
218             is being retrieved from memory. These contain comments
219             such as "Earlier you mentioned that...," which are only
220             appropriate for remembered comments. Rules in the script
221             must be specially marked in order to be included
222             in this list rather than C<%reasmblist>. The default
223             script only has a few of these rules.
224              
225             =head2 @memory
226              
227             A list of user comments which an Eliza instance is remembering
228             for future use. Eliza does not remember everything, only some things.
229             In this implementation, Eliza will only remember comments
230             which match a decomposition rule which actually has reassembly
231             rules that are marked with the keyword "reasm_for_memory"
232             rather than the normal "reasmb". The default script
233             only has a few of these.
234              
235             =head2 %keyranks
236              
237             I: the set of keywords; I: the ranks for each keyword
238              
239             =head2 @quit
240              
241             "quit" words -- that is, words the user might use
242             to try to exit the program.
243              
244             =head2 @initial
245              
246             Possible greetings for the beginning of the program.
247              
248             =head2 @final
249              
250             Possible farewells for the end of the program.
251              
252             =head2 %pre
253              
254             I: words which are replaced before any transformations;
255             I: the respective replacement words.
256              
257             =head2 %post
258              
259             I: words which are replaced after the transformations
260             and after the reply is constructed; I: the respective
261             replacement words.
262              
263             =head2 %synon
264              
265             I: words which are found in decomposition rules;
266             I: words which are treated just like their
267             corresponding synonyms during matching of decomposition
268             rules.
269              
270             =head2 Other data members
271              
272             There are several other internal data members. Hopefully
273             these are sufficiently obvious that you can learn about them
274             just by reading the source code.
275              
276             =cut
277              
278              
279             my %fields = (
280             name => 'Eliza',
281             scriptfile => '',
282              
283             debug => 0,
284             debug_text => '',
285             transform_text => '',
286             prompts_on => 1,
287             memory_on => 1,
288             botprompt => '',
289             userprompt => '',
290              
291             myrand =>
292             sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); },
293              
294             keyranks => undef,
295             decomplist => undef,
296             reasmblist => undef,
297             reasmblist_for_memory => undef,
298              
299             pre => undef,
300             post => undef,
301             synon => undef,
302             initial => undef,
303             final => undef,
304             quit => undef,
305              
306             max_memory_size => 5,
307             likelihood_of_using_memory => 1,
308             memory => undef,
309             );
310              
311              
312             ####################################################################
313             # ---{ B E G I N M E T H O D S }----------------------------------
314             #
315              
316             =head1 METHODS
317              
318             =head2 new()
319              
320             my $chatterbot = new Chatbot::Eliza;
321              
322             new() creates a new Eliza object. This method
323             also calls the internal _initialize() method, which in turn
324             calls the parse_script_data() method, which initializes
325             the script data.
326              
327             my $chatterbot = new Chatbot::Eliza 'Ahmad', 'myfile.txt';
328              
329             The eliza object defaults to the name "Eliza", and it
330             contains default script data within itself. However,
331             using the syntax above, you can specify an alternative
332             name and an alternative script file.
333              
334             See the method parse_script_data(). for a description
335             of the format of the script file.
336              
337             =cut
338              
339             sub new {
340 1     1 1 205 my ($that,$name,$scriptfile) = @_;
341 1   33     7 my $class = ref($that) || $that;
342 1         21 my $self = {
343             _permitted => \%fields,
344             %fields,
345             };
346 1         3 bless $self, $class;
347 1         4 $self->_initialize($name,$scriptfile);
348 1         2 return $self;
349             } # end method new
350              
351             sub _initialize {
352 1     1   2 my ($self,$param1,$param2) = @_;
353              
354 1 50 33     5 if (defined $param1 and ref $param1 eq "HASH") {
355              
356             # Allow the calling program to pass in intial parameters
357             # as an anonymous hash
358 0         0 map { $self->{$_} = $param1->{$_}; } keys %$param1;
  0         0  
359              
360 0         0 $self->parse_script_data( $self->{scriptfile} );
361              
362             } else {
363 1 50       12 $self->name($param1) if $param1;
364 1         4 $self->parse_script_data($param2);
365             }
366              
367             # Initialize the memory array ref at instantiation time,
368             # rather than at class definition time.
369             # (THANKS to Randal Schwartz and Robert Chin for fixing this bug.)
370             #
371 1         2 $self->{memory} = [];
372             }
373              
374             sub AUTOLOAD {
375 53     53   36 my $self = shift;
376 53   33     70 my $class = ref($self) || croak "$self is not an object : $!\n";
377 53         36 my $field = $AUTOLOAD;
378 53         111 $field =~ s/.*://; # Strip fully-qualified portion
379              
380 53 50       79 unless (exists $self->{"_permitted"}->{$field} ) {
381 0         0 croak "Can't access `$field' field in object of class $class : $!\n";
382             }
383              
384 53 100       59 if (@_) {
385 25         35 return $self->{$field} = shift;
386             } else {
387 28         109 return $self->{$field};
388             }
389             } # end method AUTOLOAD
390              
391              
392             ####################################################################
393             # --- command_interface ---
394              
395             =head2 command_interface()
396              
397             $chatterbot->command_interface;
398              
399             command_interface() opens an interactive session with
400             the Eliza object, just like the original Eliza program.
401              
402             If you want to design your own session format, then
403             you can write your own while loop and your own functions
404             for prompting for and reading user input, and use the
405             transform() method to generate Eliza's responses.
406             (I: you do not need to invoke preprocess()
407             and postprocess() directly, because these are invoked
408             from within the transform() method.)
409              
410             But if you're lazy and you want to skip all that,
411             then just use command_interface(). It's all done for you.
412              
413             During an interactive session invoked using command_interface(),
414             you can enter the word "debug" to toggle debug mode on and off.
415             You can also enter the keyword "memory" to invoke the _debug_memory()
416             method and print out the contents of the Eliza instance's memory.
417              
418             =cut
419              
420             sub command_interface {
421 0     0 1 0 my $self = shift;
422 0         0 my ($user_input, $previous_user_input, $reply);
423              
424 0         0 $user_input = "";
425              
426 0         0 $self->botprompt($self->name . ":\t"); # Eliza's prompt
427 0         0 $self->userprompt("you:\t"); # User's prompt
428              
429             # Seed the random number generator.
430 0         0 srand( time() ^ ($$ + ($$ << 15)) );
431              
432             # Print the Eliza prompt
433 0 0       0 print $self->botprompt if $self->prompts_on;
434              
435             # Print an initial greeting
436 0         0 print "$self->{initial}->[ int &{$self->{myrand}}( scalar @{ $self->{initial} } ) ]\n";
  0         0  
  0         0  
437              
438              
439             ###################################################################
440             # command loop. This loop should go on forever,
441             # until we explicity break out of it.
442             #
443 0         0 while (1) {
444              
445 0 0       0 print $self->userprompt if $self->prompts_on;
446              
447 0         0 $previous_user_input = $user_input;
448 0         0 chomp( $user_input = );
449              
450              
451             # If the user wants to quit,
452             # print out a farewell and quit.
453 0 0       0 if ($self->_testquit($user_input) ) {
454 0         0 $reply = "$self->{final}->[ int &{$self->{myrand}}( scalar @{$self->{final}} ) ]";
  0         0  
  0         0  
455 0 0       0 print $self->botprompt if $self->prompts_on;
456 0         0 print "$reply\n";
457 0         0 last;
458             }
459              
460             # If the user enters the word "debug",
461             # then toggle on/off this Eliza's debug output.
462 0 0       0 if ($user_input eq "debug") {
463 0         0 $self->debug( ! $self->debug );
464 0         0 $user_input = $previous_user_input;
465             }
466              
467             # If the user enters the word "memory",
468             # then use the _debug_memory method to dump out
469             # the current contents of Eliza's memory
470 0 0 0     0 if ($user_input eq "memory" or $user_input eq "debug memory") {
471 0         0 print $self->_debug_memory();
472 0         0 redo;
473             }
474              
475             # If the user enters the word "debug that",
476             # then dump out the debugging of the
477             # most recent call to transform.
478 0 0       0 if ($user_input eq "debug that") {
479 0         0 print $self->debug_text();
480 0         0 redo;
481             }
482              
483             # Invoke the transform method
484             # to generate a reply.
485 0         0 $reply = $self->transform( $user_input );
486              
487              
488             # Print out the debugging text if debugging is set to on.
489             # This variable should have been set by the transform method.
490 0 0       0 print $self->debug_text if $self->debug;
491              
492             # Print the actual reply
493 0 0       0 print $self->botprompt if $self->prompts_on;
494 0         0 print "$reply\n";
495              
496             } # End UI command loop.
497              
498              
499             } # End method command_interface
500              
501              
502             ####################################################################
503             # --- preprocess ---
504              
505             =head2 preprocess()
506              
507             $string = preprocess($string);
508              
509             preprocess() applies simple substitution rules to the input string.
510             Mostly this is to catch varieties in spelling, misspellings,
511             contractions and the like.
512              
513             preprocess() is called from within the transform() method.
514             It is applied to user-input text, BEFORE any processing,
515             and before a reassebly statement has been selected.
516              
517             It uses the array C<%pre>, which is created
518             during the parse of the script.
519              
520             =cut
521              
522             sub preprocess {
523 4     4 1 5 my ($self,$string) = @_;
524              
525 4         6 my ($i, @wordsout, @wordsin, $keyword);
526              
527 4         13 @wordsout = @wordsin = split / /, $string;
528              
529 4         10 WORD: for ($i = 0; $i < @wordsin; $i++) {
530 8         5 foreach $keyword (keys %{ $self->{pre} }) {
  8         20  
531 0 0       0 if ($wordsin[$i] =~ /\b$keyword\b/i ) {
532 0         0 ($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{pre}->{$keyword}/ig;
533 0         0 next WORD;
534             }
535             }
536             }
537 4         11 return join ' ', @wordsout;
538             }
539              
540              
541             ####################################################################
542             # --- postprocess ---
543              
544             =head2 postprocess()
545              
546             $string = postprocess($string);
547              
548             postprocess() applies simple substitution rules to the
549             reassembly rule. This is where all the "I"'s and "you"'s
550             are exchanged. postprocess() is called from within the
551             transform() function.
552              
553             It uses the array C<%post>, created
554             during the parse of the script.
555              
556             =cut
557              
558             sub postprocess {
559 27     27 1 25 my ($self,$string) = @_;
560              
561 27         14 my ($i, @wordsout, @wordsin, $keyword);
562              
563 27         33 @wordsin = @wordsout = split (/ /, $string);
564              
565 27         43 WORD: for ($i = 0; $i < @wordsin; $i++) {
566 5         2 foreach $keyword (keys %{ $self->{post} }) {
  5         13  
567 0 0       0 if ($wordsin[$i] =~ /\b$keyword\b/i ) {
568 0         0 ($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{post}->{$keyword}/ig;
569 0         0 next WORD;
570             }
571             }
572             }
573 27         38 return join ' ', @wordsout;
574             }
575              
576             ####################################################################
577             # --- _testquit ---
578              
579             =head2 _testquit()
580              
581             if ($self->_testquit($user_input) ) { ... }
582              
583             _testquit() detects words like "bye" and "quit" and returns
584             true if it finds one of them as the first word in the sentence.
585              
586             These words are listed in the script, under the keyword "quit".
587              
588             =cut
589              
590             sub _testquit {
591 0     0   0 my ($self,$string) = @_;
592              
593 0         0 my ($quitword, @wordsin);
594              
595 0         0 foreach $quitword (@{ $self->{quit} }) {
  0         0  
596 0 0       0 return 1 if ($string =~ /\b$quitword\b/i ) ;
597             }
598             }
599              
600              
601             ####################################################################
602             # --- _debug_memory ---
603              
604             =head2 _debug_memory()
605              
606             $self->_debug_memory()
607              
608             _debug_memory() is a special function which returns
609             the contents of Eliza's memory stack.
610              
611              
612             =cut
613              
614             sub _debug_memory {
615              
616 0     0   0 my ($self) = @_;
617              
618 0         0 my $string = "\t";
619 0         0 $string .= $#{ $self->memory } + 1;
  0         0  
620 0         0 $string .= " item(s) in memory stack:\n";
621              
622             # [THANKS to Roy Stephan for helping me adjust this bit]
623             #
624 0         0 foreach (@{ $self->memory } ) {
  0         0  
625              
626 0         0 my $line = $_;
627 0         0 $string .= sprintf "\t\t->$line\n" ;
628             };
629              
630 0         0 return $string;
631             }
632              
633             ####################################################################
634             # --- transform ---
635              
636             =head2 transform()
637              
638             $reply = $chatterbot->transform( $string, $use_memory );
639              
640             transform() applies transformation rules to the user input
641             string. It invokes preprocess(), does transformations,
642             then invokes postprocess(). It returns the tranformed
643             output string, called C<$reasmb>.
644              
645             The algorithm embedded in the transform() method has three main parts:
646              
647             =over
648              
649             =item 1
650              
651             Search the input string for a keyword.
652              
653             =item 2
654              
655             If we find a keyword, use the list of decomposition rules
656             for that keyword, and pattern-match the input string against
657             each rule.
658              
659             =item 3
660              
661             If the input string matches any of the decomposition rules,
662             then randomly select one of the reassembly rules for that
663             decomposition rule, and use it to construct the reply.
664              
665             =back
666              
667             transform() takes two parameters. The first is the string we want
668             to transform. The second is a flag which indicates where this sting
669             came from. If the flag is set, then the string has been pulled
670             from memory, and we should use reassembly rules appropriate
671             for that. If the flag is not set, then the string is the most
672             recent user input, and we can use the ordinary reassembly rules.
673              
674             The memory flag is only set when the transform() function is called
675             recursively. The mechanism for setting this parameter is
676             embedded in the transoform method itself. If the flag is set
677             inappropriately, it is ignored.
678              
679             =cut
680              
681             sub transform{
682 4     4 1 662 my ($self,$string,$use_memory) = @_;
683              
684             # Initialize the debugging text buffer.
685 4         16 $self->debug_text('');
686              
687 4 50       8 $self->debug_text(sprintf "\t[Pulling string \"$string\" from memory.]\n")
688             if $use_memory;
689              
690 4         2 my ($i, @string_parts, $string_part, $rank, $goto, $reasmb, $keyword,
691             $decomp, $this_decomp, $reasmbkey, @these_reasmbs,
692             @decomp_matches, $synonyms, $synonym_index);
693              
694             # Default to a really low rank.
695 4         5 $rank = -2;
696 4         3 $reasmb = "";
697 4         3 $goto = "";
698              
699             # First run the string through the preprocessor.
700 4         6 $string = $self->preprocess( $string );
701              
702             # Convert punctuation to periods. We will assume that commas
703             # and certain conjunctions separate distinct thoughts/sentences.
704 4         6 $string =~ s/[?!,]/./g;
705 4         5 $string =~ s/but/./g; # Yikes! This is English-specific.
706              
707             # Split the string by periods into an array
708 4         6 @string_parts = split /\./, $string ;
709              
710             # Examine each part of the input string in turn.
711 4         5 STRING_PARTS: foreach $string_part (@string_parts) {
712              
713             # Run through the whole list of keywords.
714 4         4 KEYWORD: foreach $keyword (keys %{ $self->{decomplist} }) {
  4         9  
715              
716             # Check to see if the input string contains a keyword
717             # which outranks any we have found previously
718             # (On first loop, rank is set to -2.)
719 12 100 66     133 if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto)
      66        
720             and
721             $rank < $self->{keyranks}->{$keyword}
722             )
723             {
724             # If we find one, then set $rank to equal
725             # the rank of that keyword.
726 3         5 $rank = $self->{keyranks}->{$keyword};
727              
728 3         10 $self->debug_text($self->debug_text . sprintf "\t$rank> $keyword");
729              
730             # Now let's check all the decomposition rules for that keyword.
731 3         3 DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) {
  3         5  
732              
733             # Change '*' to '\b(.*)\b' in this decomposition rule,
734             # so we can use it for regular expressions. Later,
735             # we will want to isolate individual matches to each wildcard.
736 3         14 ($this_decomp = $decomp) =~ s/\s*\*\s*/\\b\(\.\*\)\\b/g;
737              
738             # If this docomposition rule contains a word which begins with '@',
739             # then the script also contained some synonyms for that word.
740             # Find them all using %synon and generate a regular expression
741             # containing all of them.
742 3 50       6 if ($this_decomp =~ /\@/ ) {
743 0         0 ($synonym_index = $this_decomp) =~ s/.*\@(\w*).*/$1/i ;
744 0         0 $synonyms = join ('|', @{ $self->{synon}->{$synonym_index} });
  0         0  
745 0         0 $this_decomp =~ s/(.*)\@$synonym_index(.*)/$1($synonym_index\|$synonyms)$2/g;
746             }
747              
748 3         9 $self->debug_text($self->debug_text . sprintf "\n\t\t: $decomp");
749              
750             # Using the regular expression we just generated,
751             # match against the input string. Use empty "()"'s to
752             # eliminate warnings about uninitialized variables.
753 3 50       46 if ($string_part =~ /$this_decomp()()()()()()()()()()/i) {
754              
755             # If this decomp rule matched the string,
756             # then create an array, so that we can refer to matches
757             # to individual wildcards. Use '0' as a placeholder
758             # (we don't want to refer to any "zeroth" wildcard).
759 3         19 @decomp_matches = ("0", $1, $2, $3, $4, $5, $6, $7, $8, $9);
760 3         9 $self->debug_text($self->debug_text . sprintf " : @decomp_matches\n");
761              
762             # Using the keyword and the decomposition rule,
763             # reconstruct a key for the list of reassamble rules.
764 3         5 $reasmbkey = join ($;,$keyword,$decomp);
765              
766             # Get the list of possible reassembly rules for this key.
767             #
768 3   33     7 my $memory = (defined $use_memory and $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0);
769              
770             # Pick out next reassembly rule.
771 3         5 $reasmb = $self->_get_next_reasmb( $reasmbkey, $memory);
772              
773 3         8 $self->debug_text($self->debug_text . sprintf "\t\t--> $reasmb\n");
774              
775             # If the reassembly rule we picked contains the word "goto",
776             # then we start over with a new keyword. Set $keyword to equal
777             # that word, and start the whole loop over.
778 3 50       8 if ($reasmb =~ m/^goto\s(\w*).*/i) {
779 0         0 $self->debug_text($self->debug_text . sprintf "\$1 = $1\n");
780 0         0 $goto = $keyword = $1;
781 0         0 $rank = -2;
782 0         0 redo KEYWORD;
783             }
784              
785             # Otherwise, using the matches to wildcards which we stored above,
786             # insert words from the input string back into the reassembly rule.
787             # [THANKS to Gidon Wise for submitting a bugfix here]
788 3         13 for ($i=1; $i <= $#decomp_matches; $i++) {
789 27         31 $decomp_matches[$i] = $self->postprocess( $decomp_matches[$i] );
790 27         62 $decomp_matches[$i] =~ s/([,;?!]|\.*)$//;
791 27         178 $reasmb =~ s/\($i\)/$decomp_matches[$i]/g;
792             }
793              
794             # Move on to the next keyword. If no other keywords match,
795             # then we'll end up actually using the $reasmb string
796             # we just generated above.
797 3         8 next KEYWORD ;
798              
799             } # End if ($string_part =~ /$this_decomp/i)
800              
801 0         0 $self->debug_text($self->debug_text . sprintf "\n");
802              
803             } # End DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} })
804              
805             } # End if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto)
806              
807             } # End KEYWORD: foreach $keyword (keys %{ $self->{decomplist})
808            
809             } # End STRING_PARTS: foreach $string_part (@string_parts) {
810              
811             =head2 How memory is used
812              
813             In the script, some reassembly rules are special. They are marked with
814             the keyword "reasm_for_memory", rather than just "reasm".
815             Eliza "remembers" any comment when it matches a docomposition rule
816             for which there are any reassembly rules for memory.
817             An Eliza object remembers up to C<$max_memory_size> (default: 5)
818             user input strings.
819              
820             If, during a subsequent run, the transform() method fails to find any
821             appropriate decomposition rule for a user's comment, and if there are
822             any comments inside the memory array, then Eliza may elect to ignore
823             the most recent comment and instead pull out one of the strings from memory.
824             In this case, the transform method is called recursively with the memory flag.
825              
826             Honestly, I am not sure exactly how this memory functionality
827             was implemented in the original Eliza program. Hopefully
828             this implementation is not too far from Weizenbaum's.
829              
830             If you don't want to use the memory functionality at all,
831             then you can disable it:
832              
833             $mybot->memory_on(0);
834              
835             You can also achieve the same effect by making sure
836             that the script data does not contain any reassembly rules
837             marked with the keyword "reasm_for_memory". The default
838             script data only has 4 such items.
839              
840             =cut
841              
842 4 100       15 if ($reasmb eq "") {
    50          
843              
844             # If all else fails, call this method recursively
845             # and make sure that it has something to parse.
846             # Use a string from memory if anything is available.
847             #
848             # $self-likelihood_of_using_memory should be some number
849             # between 1 and 0; it defaults to 1.
850             #
851 1 50 33     2 if (
852 1         4 $#{ $self->memory } >= 0
853             and
854 0         0 &{$self->{myrand}}(1) >= 1 - $self->likelihood_of_using_memory
855             ) {
856              
857 0         0 $reasmb = $self->transform( shift @{ $self->memory }, "use memory" );
  0         0  
858              
859             } else {
860 1         7 $reasmb = $self->transform("xnone");
861             }
862              
863             } elsif ($self->memory_on) {
864              
865             # If memory is switched on, then we handle memory.
866              
867             # Now that we have successfully transformed this string,
868             # push it onto the end of the memory stack... unless, of course,
869             # that's where we got it from in the first place, or if the rank
870             # is not the kind we remember.
871             #
872 3 50 33     3 if (
873 3         13 $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0
874             and
875             not defined $use_memory
876             ) {
877              
878 0         0 push @{ $self->memory },$string ;
  0         0  
879             }
880              
881             # Shift out the least-recent item from the bottom
882             # of the memory stack if the stack exceeds the max size.
883 3 50       2 shift @{ $self->memory } if $#{ $self->memory } >= $self->max_memory_size;
  0         0  
  3         10  
884              
885             $self->debug_text($self->debug_text
886 3         8 . sprintf("\t%d item(s) in memory.\n", $#{ $self->memory } + 1 ) ) ;
  3         7  
887              
888             } # End if ($reasmb eq "")
889              
890 4         7 $reasmb =~ tr/ / /s; # Eliminate any duplicate space characters.
891 4         3 $reasmb =~ s/[ ][?]$/?/; # Eliminate any spaces before the question mark.
892              
893             # Save the return string so that forgetful calling programs
894             # can ask the bot what the last reply was.
895 4         10 $self->transform_text($reasmb);
896              
897 4         13 return $reasmb ;
898             }
899              
900             # _get_next_reasmb( $key, $memory_flag )
901             #
902             # Given a key to a reasmb list and a flag indicating whether the list should
903             # be pulled from a memory list or standard script list, returns the
904             # next reasmb in the list, wrapping back to the start if the last one
905             # is reached.
906             sub _get_next_reasmb {
907 3     3   3 my ( $self, $reasmbkey, $memory ) = @_;
908              
909 3 50       6 my $for_memory = $memory ? '_for_memory' : '';
910 3         2 my @these_reasmbs = @{ $self->{"reasmblist$for_memory"}->{$reasmbkey} };
  3         7  
911 3         8 my $next_reasmb = $self->{"next_reasmblist$for_memory"}->{$reasmbkey}++;
912 3 50       5 if ( $next_reasmb > scalar( @these_reasmbs ) ) {
913 0         0 $next_reasmb = 1;
914 0         0 $self->{"next_reasmblist$for_memory"}->{$reasmbkey} = 0;
915             }
916              
917 3         7 return $these_reasmbs[$next_reasmb - 1];
918             }
919              
920             ####################################################################
921             # --- parse_script_data ---
922              
923             =head2 parse_script_data()
924              
925             $self->parse_script_data;
926             $self->parse_script_data( $script_file );
927              
928             parse_script_data() is invoked from the _initialize() method,
929             which is called from the new() function. However, you can also
930             call this method at any time against an already-instantiated
931             Eliza instance. In that case, the new script data is I
932             to the old script data. The old script data is not deleted.
933              
934             You can pass a parameter to this function, which is the name of the
935             script file, and it will read in and parse that file.
936             If you do not pass any parameter to this method, then
937             it will read the data embedded at the end of the module as its
938             default script data.
939              
940             If you pass the name of a script file to parse_script_data(),
941             and that file is not available for reading, then the module dies.
942              
943              
944             =head1 Format of the script file
945              
946             This module includes a default script file within itself,
947             so it is not necessary to explicitly specify a script file
948             when instantiating an Eliza object.
949              
950             Each line in the script file can specify a key,
951             a decomposition rule, or a reassembly rule.
952              
953             key: remember 5
954             decomp: * i remember *
955             reasmb: Do you often think of (2) ?
956             reasmb: Does thinking of (2) bring anything else to mind ?
957             decomp: * do you remember *
958             reasmb: Did you think I would forget (2) ?
959             reasmb: What about (2) ?
960             reasmb: goto what
961             pre: equivalent alike
962             synon: belief feel think believe wish
963              
964             The number after the key specifies the rank.
965             If a user's input contains the keyword, then
966             the transform() function will try to match
967             one of the decomposition rules for that keyword.
968             If one matches, then it will select one of
969             the reassembly rules at random. The number
970             (2) here means "use whatever set of words
971             matched the second asterisk in the decomposition
972             rule."
973              
974             If you specify a list of synonyms for a word,
975             the you should use a "@" when you use that
976             word in a decomposition rule:
977              
978             decomp: * i @belief i *
979             reasmb: Do you really think so ?
980             reasmb: But you are not sure you (3).
981              
982             Otherwise, the script will never check to see
983             if there are any synonyms for that keyword.
984              
985             Reassembly rules should be marked with I
986             rather than I when it is appropriate for use
987             when a user's comment has been extracted from memory.
988              
989             key: my 2
990             decomp: * my *
991             reasm_for_memory: Let's discuss further why your (2).
992             reasm_for_memory: Earlier you said your (2).
993             reasm_for_memory: But your (2).
994             reasm_for_memory: Does that have anything to do with the fact that your (2) ?
995              
996             =head1 How the script file is parsed
997              
998             Each line in the script file contains an "entrytype"
999             (key, decomp, synon) and an "entry", separated by
1000             a colon. In turn, each "entry" can itself be
1001             composed of a "key" and a "value", separated by
1002             a space. The parse_script_data() function
1003             parses each line out, and splits the "entry" and
1004             "entrytype" portion of each line into two variables,
1005             C<$entry> and C<$entrytype>.
1006              
1007             Next, it uses the string C<$entrytype> to determine
1008             what sort of stuff to expect in the C<$entry> variable,
1009             if anything, and parses it accordingly. In some cases,
1010             there is no second level of key-value pair, so the function
1011             does not even bother to isolate or create C<$key> and C<$value>.
1012              
1013             C<$key> is always a single word. C<$value> can be null,
1014             or one single word, or a string composed of several words,
1015             or an array of words.
1016              
1017             Based on all these entries and keys and values,
1018             the function creates two giant hashes:
1019             C<%decomplist>, which holds the decomposition rules for
1020             each keyword, and C<%reasmblist>, which holds the
1021             reassembly phrases for each decomposition rule.
1022             It also creates C<%keyranks>, which holds the ranks for
1023             each key.
1024              
1025             Six other arrays are created: C<%reasm_for_memory, %pre, %post,
1026             %synon, @initial,> and C<@final>.
1027              
1028             =cut
1029              
1030             sub parse_script_data {
1031              
1032 1     1 1 2 my ($self,$scriptfile) = @_;
1033 1         1 my @scriptlines;
1034              
1035 1 50       2 if ($scriptfile) {
1036              
1037             # If we have an external script file, open it
1038             # and read it in (the whole thing, all at once).
1039 1 50       37 open (SCRIPTFILE, "<$scriptfile")
1040             or die "Could not read from file $scriptfile : $!\n";
1041 1         28 @scriptlines = ; # read in script data
1042 1         7 $self->scriptfile($scriptfile);
1043 1         6 close (SCRIPTFILE);
1044              
1045             } else {
1046              
1047             # Otherwise, read in the data from the bottom
1048             # of this file. This data might be read several
1049             # times, so we save the offset pointer and
1050             # reset it when we're done.
1051 0         0 my $where= tell(DATA);
1052 0         0 @scriptlines = ; # read in script data
1053 0         0 seek(DATA, $where, 0);
1054 0         0 $self->scriptfile('');
1055             }
1056              
1057 1         2 my ($entrytype, $entry, $key, $value) ;
1058 1         2 my $thiskey = "";
1059 1         2 my $thisdecomp = "";
1060              
1061             ############################################################
1062             # Examine each line of script data.
1063 1         2 for (@scriptlines) {
1064              
1065             # Skip comments and lines with only whitespace.
1066 9 50 33     44 next if (/^\s*#/ || /^\s*$/);
1067              
1068             # Split entrytype and entry, using a colon as the delimiter.
1069 9         25 ($entrytype, $entry) = $_ =~ m/^\s*(\S*)\s*:\s*(.*)\s*$/;
1070              
1071             # Case loop, based on the entrytype.
1072 9         8 for ($entrytype) {
1073              
1074 9 50       14 /quit/ and do { push @{ $self->{quit} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1075 9 50       11 /initial/ and do { push @{ $self->{initial} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1076 9 50       10 /final/ and do { push @{ $self->{final} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1077              
1078 9 100       13 /decomp/ and do {
1079 3 50       6 die "$0: error parsing script: decomposition rule with no keyword.\n"
1080             if $thiskey eq "";
1081 3         6 $thisdecomp = join($;,$thiskey,$entry);
1082 3         2 push @{ $self->{decomplist}->{$thiskey} }, $entry ;
  3         7  
1083 3         2 last;
1084             };
1085              
1086 6 100       8 /reasmb/ and do {
1087 3 50       4 die "$0: error parsing script: reassembly rule with no decomposition rule.\n"
1088             if $thisdecomp eq "";
1089 3         3 push @{ $self->{reasmblist}->{$thisdecomp} }, $entry ;
  3         6  
1090 3         4 last;
1091             };
1092              
1093 3 50       4 /reasm_for_memory/ and do {
1094 0 0       0 die "$0: error parsing script: reassembly rule with no decomposition rule.\n"
1095             if $thisdecomp eq "";
1096 0         0 push @{ $self->{reasmblist_for_memory}->{$thisdecomp} }, $entry ;
  0         0  
1097 0         0 last;
1098             };
1099              
1100             # The entrytypes below actually expect to see a key and value
1101             # pair in the entry, so we split them out. The first word,
1102             # separated by a space, is the key, and everything else is
1103             # an array of values.
1104              
1105 3         7 ($key,$value) = $entry =~ m/^\s*(\S*)\s*(.*)/;
1106              
1107 3 50       6 /pre/ and do { $self->{pre}->{$key} = $value; last; };
  0         0  
  0         0  
1108 3 50       4 /post/ and do { $self->{post}->{$key} = $value; last; };
  0         0  
  0         0  
1109              
1110             # synon expects an array, so we split $value into an array, using " " as delimiter.
1111 3 50       4 /synon/ and do { $self->{synon}->{$key} = [ split /\ /, $value ]; last; };
  0         0  
  0         0  
1112              
1113 3 50       6 /key/ and do {
1114 3         4 $thiskey = $key;
1115 3         2 $thisdecomp = "";
1116 3         5 $self->{keyranks}->{$thiskey} = $value ;
1117 3         15 last;
1118             };
1119            
1120             } # End for ($entrytype) (case loop)
1121              
1122             } # End for (@scriptlines)
1123              
1124             } # End of method parse_script_data
1125              
1126              
1127             # Eliminate some pesky warnings.
1128             #
1129       0     sub DESTROY {}
1130              
1131              
1132             # ---{ E N D M E T H O D S }----------------------------------
1133             ####################################################################
1134              
1135             1; # Return a true value.
1136              
1137              
1138             =head1 COPYRIGHT AND LICENSE
1139              
1140             This software is copyright (c) 2003 by John Nolan Ejpnolan@sonic.netE.
1141              
1142             This is free software; you can redistribute it and/or modify it under
1143             the same terms as the Perl 5 programming language system itself.
1144              
1145             =head1 AUTHOR
1146              
1147             John Nolan jpnolan@sonic.net January 2003.
1148              
1149             Implements the classic Eliza algorithm by Prof. Joseph Weizenbaum.
1150             Script format devised by Charles Hayden.
1151              
1152             =cut
1153              
1154              
1155              
1156             ####################################################################
1157             # ---{ B E G I N D E F A U L T S C R I P T D A T A }----------
1158             #
1159             # This script was prepared by Chris Hayden. Hayden's Eliza
1160             # program was written in Java, however, it attempted to match
1161             # the functionality of Weizenbaum's original program as closely
1162             # as possible.
1163             #
1164             # Hayden's script format was quite different from Weizenbaum's,
1165             # but it maintained the same content. I have adapted Hayden's
1166             # script format, since it was simple and convenient enough
1167             # for my purposes.
1168             #
1169             # I've made small modifications here and there.
1170             #
1171              
1172             # We use the token __DATA__ rather than __END__,
1173             # so that all this data is visible within the current package.
1174              
1175             __DATA__