| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
################################################################### |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Chatbot::Eliza; |
|
4
|
|
|
|
|
|
|
$Chatbot::Eliza::VERSION = '1.06'; |
|
5
|
|
|
|
|
|
|
# Copyright (c) 1997-2003 John Nolan. All rights reserved. |
|
6
|
|
|
|
|
|
|
# This program is free software. You may modify and/or |
|
7
|
|
|
|
|
|
|
# distribute it under the same terms as Perl itself. |
|
8
|
|
|
|
|
|
|
# This copyright notice must remain attached to the file. |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You can run this file through either pod2man or pod2html |
|
11
|
|
|
|
|
|
|
# to produce pretty documentation in manual or html file format |
|
12
|
|
|
|
|
|
|
# (these utilities are part of the Perl 5 distribution). |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
# POD documentation is distributed throughout the actual code |
|
15
|
|
|
|
|
|
|
# so that it also functions as comments. |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
require 5.006; |
|
18
|
2
|
|
|
2
|
|
1837
|
use strict; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
57
|
|
|
19
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
56
|
|
|
20
|
2
|
|
|
2
|
|
17
|
use Carp; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
6747
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $AUTOLOAD; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#################################################################### |
|
27
|
|
|
|
|
|
|
# ---{ B E G I N P O D D O C U M E N T A T I O N }-------------- |
|
28
|
|
|
|
|
|
|
# |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
B - A clone of the classic Eliza program |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use Chatbot::Eliza; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$mybot = new Chatbot::Eliza; |
|
39
|
|
|
|
|
|
|
$mybot->command_interface; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# see below for details |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This module implements the classic Eliza algorithm. |
|
47
|
|
|
|
|
|
|
The original Eliza program was written by Joseph |
|
48
|
|
|
|
|
|
|
Weizenbaum and described in the Communications |
|
49
|
|
|
|
|
|
|
of the ACM in 1966. Eliza is a mock Rogerian |
|
50
|
|
|
|
|
|
|
psychotherapist. It prompts for user input, |
|
51
|
|
|
|
|
|
|
and uses a simple transformation algorithm |
|
52
|
|
|
|
|
|
|
to change user input into a follow-up question. |
|
53
|
|
|
|
|
|
|
The program is designed to give the appearance |
|
54
|
|
|
|
|
|
|
of understanding. |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
This program is a faithful implementation of the program |
|
57
|
|
|
|
|
|
|
described by Weizenbaum. It uses a simplified script |
|
58
|
|
|
|
|
|
|
language (devised by Charles Hayden). The content |
|
59
|
|
|
|
|
|
|
of the script is the same as Weizenbaum's. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
This module encapsulates the Eliza algorithm |
|
62
|
|
|
|
|
|
|
in the form of an object. This should make |
|
63
|
|
|
|
|
|
|
the functionality easy to incorporate in larger programs. |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 INSTALLATION |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The current version of Chatbot::Eliza.pm is available on CPAN: |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
http://www.perl.com/CPAN/modules/by-module/Chatbot/ |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
To install this package, just change to the directory which |
|
73
|
|
|
|
|
|
|
you created by untarring the package, and type the following: |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
perl Makefile.PL |
|
76
|
|
|
|
|
|
|
make test |
|
77
|
|
|
|
|
|
|
make |
|
78
|
|
|
|
|
|
|
make install |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
This will copy Eliza.pm to your perl library directory for |
|
81
|
|
|
|
|
|
|
use by all perl scripts. You probably must be root to do this, |
|
82
|
|
|
|
|
|
|
unless you have installed a personal copy of perl. |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 USAGE |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
This is all you need to do to launch a simple |
|
88
|
|
|
|
|
|
|
Eliza session: |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
use Chatbot::Eliza; |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
$mybot = new Chatbot::Eliza; |
|
93
|
|
|
|
|
|
|
$mybot->command_interface; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
You can also customize certain features of the |
|
96
|
|
|
|
|
|
|
session: |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$myotherbot = new Chatbot::Eliza; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
$myotherbot->name( "Hortense" ); |
|
101
|
|
|
|
|
|
|
$myotherbot->debug( 1 ); |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$myotherbot->command_interface; |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
These lines set the name of the bot to be |
|
106
|
|
|
|
|
|
|
"Hortense" and turn on the debugging output. |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
When creating an Eliza object, you can specify |
|
109
|
|
|
|
|
|
|
a name and an alternative scriptfile: |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
$bot = new Chatbot::Eliza "Brian", "myscript.txt"; |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
You can also use an anonymous hash to set these parameters. |
|
114
|
|
|
|
|
|
|
Any of the fields can be initialized using this syntax: |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$bot = new Chatbot::Eliza { |
|
117
|
|
|
|
|
|
|
name => "Brian", |
|
118
|
|
|
|
|
|
|
scriptfile => "myscript.txt", |
|
119
|
|
|
|
|
|
|
debug => 1, |
|
120
|
|
|
|
|
|
|
prompts_on => 1, |
|
121
|
|
|
|
|
|
|
memory_on => 0, |
|
122
|
|
|
|
|
|
|
myrand => |
|
123
|
|
|
|
|
|
|
sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); }, |
|
124
|
|
|
|
|
|
|
}; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
If you don't specify a script file, then the new object will be |
|
127
|
|
|
|
|
|
|
initialized with a default script. The module contains this |
|
128
|
|
|
|
|
|
|
script within itself. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
You can use any of the internal functions in |
|
131
|
|
|
|
|
|
|
a calling program. The code below takes an |
|
132
|
|
|
|
|
|
|
arbitrary string and retrieves the reply from |
|
133
|
|
|
|
|
|
|
the Eliza object: |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
my $string = "I have too many problems."; |
|
136
|
|
|
|
|
|
|
my $reply = $mybot->transform( $string ); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
You can easily create two bots, each with a different |
|
139
|
|
|
|
|
|
|
script, and see how they interact: |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
use Chatbot::Eliza |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my ($harry, $sally, $he_says, $she_says); |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
$sally = new Chatbot::Eliza "Sally", "histext.txt"; |
|
146
|
|
|
|
|
|
|
$harry = new Chatbot::Eliza "Harry", "hertext.txt"; |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
$he_says = "I am sad."; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Seed the random number generator. |
|
151
|
|
|
|
|
|
|
srand( time ^ ($$ + ($$ << 15)) ); |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
while (1) { |
|
154
|
|
|
|
|
|
|
$she_says = $sally->transform( $he_says ); |
|
155
|
|
|
|
|
|
|
print $sally->name, ": $she_says \n"; |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
$he_says = $harry->transform( $she_says ); |
|
158
|
|
|
|
|
|
|
print $harry->name, ": $he_says \n"; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Mechanically, this works well. However, it critically depends |
|
162
|
|
|
|
|
|
|
on the actual script data. Having two mock Rogerian therapists |
|
163
|
|
|
|
|
|
|
talk to each other usually does not produce any sensible conversation, |
|
164
|
|
|
|
|
|
|
of course. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
After each call to the transform() method, the debugging output |
|
167
|
|
|
|
|
|
|
for that transformation is stored in a variable called $debug_text. |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my $reply = $mybot->transform( "My foot hurts" ); |
|
170
|
|
|
|
|
|
|
my $debugging = $mybot->debug_text; |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
This feature always available, even if the instance's $debug |
|
173
|
|
|
|
|
|
|
variable is set to 0. |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Calling programs can specify their own random-number generators. |
|
176
|
|
|
|
|
|
|
Use this syntax: |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$chatbot = new Chatbot::Eliza; |
|
179
|
|
|
|
|
|
|
$chatbot->myrand( |
|
180
|
|
|
|
|
|
|
sub { |
|
181
|
|
|
|
|
|
|
#function goes here! |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
); |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
The custom random function should have the same prototype |
|
186
|
|
|
|
|
|
|
as perl's built-in rand() function. That is, it should take |
|
187
|
|
|
|
|
|
|
a single (numeric) expression as a parameter, and it should |
|
188
|
|
|
|
|
|
|
return a floating-point value between 0 and that number. |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
What this code actually does is pass a reference to an anonymous |
|
191
|
|
|
|
|
|
|
subroutine ("code reference"). Make sure you've read the perlref |
|
192
|
|
|
|
|
|
|
manpage for details on how code references actually work. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
If you don't specify any custom rand function, then the Eliza |
|
195
|
|
|
|
|
|
|
object will just use the built-in rand() function. |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head1 MAIN DATA MEMBERS |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Each Eliza object uses the following data structures |
|
200
|
|
|
|
|
|
|
to hold the script data in memory: |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 %decomplist |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
I: the set of keywords; I: strings containing |
|
205
|
|
|
|
|
|
|
the decomposition rules. |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 %reasmblist |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
I: a set of values which are each the join |
|
210
|
|
|
|
|
|
|
of a keyword and a corresponding decomposition rule; |
|
211
|
|
|
|
|
|
|
I: the set of possible reassembly statements |
|
212
|
|
|
|
|
|
|
for that keyword and decomposition rule. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head2 %reasmblist_for_memory |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
This structure is identical to C<%reasmblist>, except |
|
217
|
|
|
|
|
|
|
that these rules are only invoked when a user comment |
|
218
|
|
|
|
|
|
|
is being retrieved from memory. These contain comments |
|
219
|
|
|
|
|
|
|
such as "Earlier you mentioned that...," which are only |
|
220
|
|
|
|
|
|
|
appropriate for remembered comments. Rules in the script |
|
221
|
|
|
|
|
|
|
must be specially marked in order to be included |
|
222
|
|
|
|
|
|
|
in this list rather than C<%reasmblist>. The default |
|
223
|
|
|
|
|
|
|
script only has a few of these rules. |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head2 @memory |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
A list of user comments which an Eliza instance is remembering |
|
228
|
|
|
|
|
|
|
for future use. Eliza does not remember everything, only some things. |
|
229
|
|
|
|
|
|
|
In this implementation, Eliza will only remember comments |
|
230
|
|
|
|
|
|
|
which match a decomposition rule which actually has reassembly |
|
231
|
|
|
|
|
|
|
rules that are marked with the keyword "reasm_for_memory" |
|
232
|
|
|
|
|
|
|
rather than the normal "reasmb". The default script |
|
233
|
|
|
|
|
|
|
only has a few of these. |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head2 %keyranks |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
I: the set of keywords; I: the ranks for each keyword |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head2 @quit |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
"quit" words -- that is, words the user might use |
|
242
|
|
|
|
|
|
|
to try to exit the program. |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=head2 @initial |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
Possible greetings for the beginning of the program. |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 @final |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Possible farewells for the end of the program. |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 %pre |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
I: words which are replaced before any transformations; |
|
255
|
|
|
|
|
|
|
I: the respective replacement words. |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 %post |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
I: words which are replaced after the transformations |
|
260
|
|
|
|
|
|
|
and after the reply is constructed; I: the respective |
|
261
|
|
|
|
|
|
|
replacement words. |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 %synon |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
I: words which are found in decomposition rules; |
|
266
|
|
|
|
|
|
|
I: words which are treated just like their |
|
267
|
|
|
|
|
|
|
corresponding synonyms during matching of decomposition |
|
268
|
|
|
|
|
|
|
rules. |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 Other data members |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
There are several other internal data members. Hopefully |
|
273
|
|
|
|
|
|
|
these are sufficiently obvious that you can learn about them |
|
274
|
|
|
|
|
|
|
just by reading the source code. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=cut |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
my %fields = ( |
|
280
|
|
|
|
|
|
|
name => 'Eliza', |
|
281
|
|
|
|
|
|
|
scriptfile => '', |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
debug => 0, |
|
284
|
|
|
|
|
|
|
debug_text => '', |
|
285
|
|
|
|
|
|
|
transform_text => '', |
|
286
|
|
|
|
|
|
|
prompts_on => 1, |
|
287
|
|
|
|
|
|
|
memory_on => 1, |
|
288
|
|
|
|
|
|
|
botprompt => '', |
|
289
|
|
|
|
|
|
|
userprompt => '', |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
myrand => |
|
292
|
|
|
|
|
|
|
sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); }, |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
keyranks => undef, |
|
295
|
|
|
|
|
|
|
decomplist => undef, |
|
296
|
|
|
|
|
|
|
reasmblist => undef, |
|
297
|
|
|
|
|
|
|
reasmblist_for_memory => undef, |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
pre => undef, |
|
300
|
|
|
|
|
|
|
post => undef, |
|
301
|
|
|
|
|
|
|
synon => undef, |
|
302
|
|
|
|
|
|
|
initial => undef, |
|
303
|
|
|
|
|
|
|
final => undef, |
|
304
|
|
|
|
|
|
|
quit => undef, |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
max_memory_size => 5, |
|
307
|
|
|
|
|
|
|
likelihood_of_using_memory => 1, |
|
308
|
|
|
|
|
|
|
memory => undef, |
|
309
|
|
|
|
|
|
|
); |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#################################################################### |
|
313
|
|
|
|
|
|
|
# ---{ B E G I N M E T H O D S }---------------------------------- |
|
314
|
|
|
|
|
|
|
# |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=head1 METHODS |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head2 new() |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my $chatterbot = new Chatbot::Eliza; |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
new() creates a new Eliza object. This method |
|
323
|
|
|
|
|
|
|
also calls the internal _initialize() method, which in turn |
|
324
|
|
|
|
|
|
|
calls the parse_script_data() method, which initializes |
|
325
|
|
|
|
|
|
|
the script data. |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
my $chatterbot = new Chatbot::Eliza 'Ahmad', 'myfile.txt'; |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
The eliza object defaults to the name "Eliza", and it |
|
330
|
|
|
|
|
|
|
contains default script data within itself. However, |
|
331
|
|
|
|
|
|
|
using the syntax above, you can specify an alternative |
|
332
|
|
|
|
|
|
|
name and an alternative script file. |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
See the method parse_script_data(). for a description |
|
335
|
|
|
|
|
|
|
of the format of the script file. |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub new { |
|
340
|
1
|
|
|
1
|
1
|
189
|
my ($that,$name,$scriptfile) = @_; |
|
341
|
1
|
|
33
|
|
|
13
|
my $class = ref($that) || $that; |
|
342
|
1
|
|
|
|
|
19
|
my $self = { |
|
343
|
|
|
|
|
|
|
_permitted => \%fields, |
|
344
|
|
|
|
|
|
|
%fields, |
|
345
|
|
|
|
|
|
|
}; |
|
346
|
1
|
|
|
|
|
4
|
bless $self, $class; |
|
347
|
1
|
|
|
|
|
4
|
$self->_initialize($name,$scriptfile); |
|
348
|
1
|
|
|
|
|
2
|
return $self; |
|
349
|
|
|
|
|
|
|
} # end method new |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _initialize { |
|
352
|
1
|
|
|
1
|
|
2
|
my ($self,$param1,$param2) = @_; |
|
353
|
|
|
|
|
|
|
|
|
354
|
1
|
50
|
33
|
|
|
7
|
if (defined $param1 and ref $param1 eq "HASH") { |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# Allow the calling program to pass in intial parameters |
|
357
|
|
|
|
|
|
|
# as an anonymous hash |
|
358
|
0
|
|
|
|
|
0
|
map { $self->{$_} = $param1->{$_}; } keys %$param1; |
|
|
0
|
|
|
|
|
0
|
|
|
359
|
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
0
|
$self->parse_script_data( $self->{scriptfile} ); |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
} else { |
|
363
|
1
|
50
|
|
|
|
12
|
$self->name($param1) if $param1; |
|
364
|
1
|
|
|
|
|
4
|
$self->parse_script_data($param2); |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Initialize the memory array ref at instantiation time, |
|
368
|
|
|
|
|
|
|
# rather than at class definition time. |
|
369
|
|
|
|
|
|
|
# (THANKS to Randal Schwartz and Robert Chin for fixing this bug.) |
|
370
|
|
|
|
|
|
|
# |
|
371
|
1
|
|
|
|
|
3
|
$self->{memory} = []; |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub AUTOLOAD { |
|
375
|
53
|
|
|
53
|
|
74
|
my $self = shift; |
|
376
|
53
|
|
33
|
|
|
112
|
my $class = ref($self) || croak "$self is not an object : $!\n"; |
|
377
|
53
|
|
|
|
|
78
|
my $field = $AUTOLOAD; |
|
378
|
53
|
|
|
|
|
158
|
$field =~ s/.*://; # Strip fully-qualified portion |
|
379
|
|
|
|
|
|
|
|
|
380
|
53
|
50
|
|
|
|
137
|
unless (exists $self->{"_permitted"}->{$field} ) { |
|
381
|
0
|
|
|
|
|
0
|
croak "Can't access `$field' field in object of class $class : $!\n"; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
53
|
100
|
|
|
|
97
|
if (@_) { |
|
385
|
25
|
|
|
|
|
56
|
return $self->{$field} = shift; |
|
386
|
|
|
|
|
|
|
} else { |
|
387
|
28
|
|
|
|
|
166
|
return $self->{$field}; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
} # end method AUTOLOAD |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
#################################################################### |
|
393
|
|
|
|
|
|
|
# --- command_interface --- |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=head2 command_interface() |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
$chatterbot->command_interface; |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
command_interface() opens an interactive session with |
|
400
|
|
|
|
|
|
|
the Eliza object, just like the original Eliza program. |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
If you want to design your own session format, then |
|
403
|
|
|
|
|
|
|
you can write your own while loop and your own functions |
|
404
|
|
|
|
|
|
|
for prompting for and reading user input, and use the |
|
405
|
|
|
|
|
|
|
transform() method to generate Eliza's responses. |
|
406
|
|
|
|
|
|
|
(I: you do not need to invoke preprocess() |
|
407
|
|
|
|
|
|
|
and postprocess() directly, because these are invoked |
|
408
|
|
|
|
|
|
|
from within the transform() method.) |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
But if you're lazy and you want to skip all that, |
|
411
|
|
|
|
|
|
|
then just use command_interface(). It's all done for you. |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
During an interactive session invoked using command_interface(), |
|
414
|
|
|
|
|
|
|
you can enter the word "debug" to toggle debug mode on and off. |
|
415
|
|
|
|
|
|
|
You can also enter the keyword "memory" to invoke the _debug_memory() |
|
416
|
|
|
|
|
|
|
method and print out the contents of the Eliza instance's memory. |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=cut |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
sub command_interface { |
|
421
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
422
|
0
|
|
|
|
|
0
|
my ($user_input, $previous_user_input, $reply); |
|
423
|
|
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
0
|
$user_input = ""; |
|
425
|
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
0
|
$self->botprompt($self->name . ":\t"); # Eliza's prompt |
|
427
|
0
|
|
|
|
|
0
|
$self->userprompt("you:\t"); # User's prompt |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# Seed the random number generator. |
|
430
|
0
|
|
|
|
|
0
|
srand( time() ^ ($$ + ($$ << 15)) ); |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# Print the Eliza prompt |
|
433
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# Print an initial greeting |
|
436
|
0
|
|
|
|
|
0
|
print "$self->{initial}->[ int &{$self->{myrand}}( scalar @{ $self->{initial} } ) ]\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
################################################################### |
|
440
|
|
|
|
|
|
|
# command loop. This loop should go on forever, |
|
441
|
|
|
|
|
|
|
# until we explicity break out of it. |
|
442
|
|
|
|
|
|
|
# |
|
443
|
0
|
|
|
|
|
0
|
while (1) { |
|
444
|
|
|
|
|
|
|
|
|
445
|
0
|
0
|
|
|
|
0
|
print $self->userprompt if $self->prompts_on; |
|
446
|
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
0
|
$previous_user_input = $user_input; |
|
448
|
0
|
|
|
|
|
0
|
chomp( $user_input = ); |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# If the user wants to quit, |
|
452
|
|
|
|
|
|
|
# print out a farewell and quit. |
|
453
|
0
|
0
|
|
|
|
0
|
if ($self->_testquit($user_input) ) { |
|
454
|
0
|
|
|
|
|
0
|
$reply = "$self->{final}->[ int &{$self->{myrand}}( scalar @{$self->{final}} ) ]"; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
455
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
|
456
|
0
|
|
|
|
|
0
|
print "$reply\n"; |
|
457
|
0
|
|
|
|
|
0
|
last; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# If the user enters the word "debug", |
|
461
|
|
|
|
|
|
|
# then toggle on/off this Eliza's debug output. |
|
462
|
0
|
0
|
|
|
|
0
|
if ($user_input eq "debug") { |
|
463
|
0
|
|
|
|
|
0
|
$self->debug( ! $self->debug ); |
|
464
|
0
|
|
|
|
|
0
|
$user_input = $previous_user_input; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# If the user enters the word "memory", |
|
468
|
|
|
|
|
|
|
# then use the _debug_memory method to dump out |
|
469
|
|
|
|
|
|
|
# the current contents of Eliza's memory |
|
470
|
0
|
0
|
0
|
|
|
0
|
if ($user_input eq "memory" or $user_input eq "debug memory") { |
|
471
|
0
|
|
|
|
|
0
|
print $self->_debug_memory(); |
|
472
|
0
|
|
|
|
|
0
|
redo; |
|
473
|
|
|
|
|
|
|
} |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# If the user enters the word "debug that", |
|
476
|
|
|
|
|
|
|
# then dump out the debugging of the |
|
477
|
|
|
|
|
|
|
# most recent call to transform. |
|
478
|
0
|
0
|
|
|
|
0
|
if ($user_input eq "debug that") { |
|
479
|
0
|
|
|
|
|
0
|
print $self->debug_text(); |
|
480
|
0
|
|
|
|
|
0
|
redo; |
|
481
|
|
|
|
|
|
|
} |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# Invoke the transform method |
|
484
|
|
|
|
|
|
|
# to generate a reply. |
|
485
|
0
|
|
|
|
|
0
|
$reply = $self->transform( $user_input ); |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# Print out the debugging text if debugging is set to on. |
|
489
|
|
|
|
|
|
|
# This variable should have been set by the transform method. |
|
490
|
0
|
0
|
|
|
|
0
|
print $self->debug_text if $self->debug; |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# Print the actual reply |
|
493
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
|
494
|
0
|
|
|
|
|
0
|
print "$reply\n"; |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
} # End UI command loop. |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
} # End method command_interface |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#################################################################### |
|
503
|
|
|
|
|
|
|
# --- preprocess --- |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 preprocess() |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
$string = preprocess($string); |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
preprocess() applies simple substitution rules to the input string. |
|
510
|
|
|
|
|
|
|
Mostly this is to catch varieties in spelling, misspellings, |
|
511
|
|
|
|
|
|
|
contractions and the like. |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
preprocess() is called from within the transform() method. |
|
514
|
|
|
|
|
|
|
It is applied to user-input text, BEFORE any processing, |
|
515
|
|
|
|
|
|
|
and before a reassebly statement has been selected. |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
It uses the array C<%pre>, which is created |
|
518
|
|
|
|
|
|
|
during the parse of the script. |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=cut |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub preprocess { |
|
523
|
4
|
|
|
4
|
1
|
6
|
my ($self,$string) = @_; |
|
524
|
|
|
|
|
|
|
|
|
525
|
4
|
|
|
|
|
6
|
my ($i, @wordsout, @wordsin, $keyword); |
|
526
|
|
|
|
|
|
|
|
|
527
|
4
|
|
|
|
|
12
|
@wordsout = @wordsin = split / /, $string; |
|
528
|
|
|
|
|
|
|
|
|
529
|
4
|
|
|
|
|
13
|
WORD: for ($i = 0; $i < @wordsin; $i++) { |
|
530
|
8
|
|
|
|
|
9
|
foreach $keyword (keys %{ $self->{pre} }) { |
|
|
8
|
|
|
|
|
34
|
|
|
531
|
0
|
0
|
|
|
|
0
|
if ($wordsin[$i] =~ /\b$keyword\b/i ) { |
|
532
|
0
|
|
|
|
|
0
|
($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{pre}->{$keyword}/ig; |
|
533
|
0
|
|
|
|
|
0
|
next WORD; |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
} |
|
537
|
4
|
|
|
|
|
13
|
return join ' ', @wordsout; |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
#################################################################### |
|
542
|
|
|
|
|
|
|
# --- postprocess --- |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head2 postprocess() |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
$string = postprocess($string); |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
postprocess() applies simple substitution rules to the |
|
549
|
|
|
|
|
|
|
reassembly rule. This is where all the "I"'s and "you"'s |
|
550
|
|
|
|
|
|
|
are exchanged. postprocess() is called from within the |
|
551
|
|
|
|
|
|
|
transform() function. |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
It uses the array C<%post>, created |
|
554
|
|
|
|
|
|
|
during the parse of the script. |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=cut |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub postprocess { |
|
559
|
27
|
|
|
27
|
1
|
41
|
my ($self,$string) = @_; |
|
560
|
|
|
|
|
|
|
|
|
561
|
27
|
|
|
|
|
30
|
my ($i, @wordsout, @wordsin, $keyword); |
|
562
|
|
|
|
|
|
|
|
|
563
|
27
|
|
|
|
|
50
|
@wordsin = @wordsout = split (/ /, $string); |
|
564
|
|
|
|
|
|
|
|
|
565
|
27
|
|
|
|
|
72
|
WORD: for ($i = 0; $i < @wordsin; $i++) { |
|
566
|
5
|
|
|
|
|
6
|
foreach $keyword (keys %{ $self->{post} }) { |
|
|
5
|
|
|
|
|
20
|
|
|
567
|
0
|
0
|
|
|
|
0
|
if ($wordsin[$i] =~ /\b$keyword\b/i ) { |
|
568
|
0
|
|
|
|
|
0
|
($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{post}->{$keyword}/ig; |
|
569
|
0
|
|
|
|
|
0
|
next WORD; |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
} |
|
572
|
|
|
|
|
|
|
} |
|
573
|
27
|
|
|
|
|
78
|
return join ' ', @wordsout; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
#################################################################### |
|
577
|
|
|
|
|
|
|
# --- _testquit --- |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head2 _testquit() |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
if ($self->_testquit($user_input) ) { ... } |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
_testquit() detects words like "bye" and "quit" and returns |
|
584
|
|
|
|
|
|
|
true if it finds one of them as the first word in the sentence. |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
These words are listed in the script, under the keyword "quit". |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut |
|
589
|
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub _testquit { |
|
591
|
0
|
|
|
0
|
|
0
|
my ($self,$string) = @_; |
|
592
|
|
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
0
|
my ($quitword, @wordsin); |
|
594
|
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
0
|
foreach $quitword (@{ $self->{quit} }) { |
|
|
0
|
|
|
|
|
0
|
|
|
596
|
0
|
0
|
|
|
|
0
|
return 1 if ($string =~ /\b$quitword\b/i ) ; |
|
597
|
|
|
|
|
|
|
} |
|
598
|
|
|
|
|
|
|
} |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
#################################################################### |
|
602
|
|
|
|
|
|
|
# --- _debug_memory --- |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=head2 _debug_memory() |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
$self->_debug_memory() |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
_debug_memory() is a special function which returns |
|
609
|
|
|
|
|
|
|
the contents of Eliza's memory stack. |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=cut |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub _debug_memory { |
|
615
|
|
|
|
|
|
|
|
|
616
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
617
|
|
|
|
|
|
|
|
|
618
|
0
|
|
|
|
|
0
|
my $string = "\t"; |
|
619
|
0
|
|
|
|
|
0
|
$string .= $#{ $self->memory } + 1; |
|
|
0
|
|
|
|
|
0
|
|
|
620
|
0
|
|
|
|
|
0
|
$string .= " item(s) in memory stack:\n"; |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# [THANKS to Roy Stephan for helping me adjust this bit] |
|
623
|
|
|
|
|
|
|
# |
|
624
|
0
|
|
|
|
|
0
|
foreach (@{ $self->memory } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
625
|
|
|
|
|
|
|
|
|
626
|
0
|
|
|
|
|
0
|
my $line = $_; |
|
627
|
0
|
|
|
|
|
0
|
$string .= sprintf "\t\t->$line\n" ; |
|
628
|
|
|
|
|
|
|
}; |
|
629
|
|
|
|
|
|
|
|
|
630
|
0
|
|
|
|
|
0
|
return $string; |
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#################################################################### |
|
634
|
|
|
|
|
|
|
# --- transform --- |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=head2 transform() |
|
637
|
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
$reply = $chatterbot->transform( $string, $use_memory ); |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
transform() applies transformation rules to the user input |
|
641
|
|
|
|
|
|
|
string. It invokes preprocess(), does transformations, |
|
642
|
|
|
|
|
|
|
then invokes postprocess(). It returns the tranformed |
|
643
|
|
|
|
|
|
|
output string, called C<$reasmb>. |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
The algorithm embedded in the transform() method has three main parts: |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
=over |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item 1 |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Search the input string for a keyword. |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=item 2 |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
If we find a keyword, use the list of decomposition rules |
|
656
|
|
|
|
|
|
|
for that keyword, and pattern-match the input string against |
|
657
|
|
|
|
|
|
|
each rule. |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=item 3 |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
If the input string matches any of the decomposition rules, |
|
662
|
|
|
|
|
|
|
then randomly select one of the reassembly rules for that |
|
663
|
|
|
|
|
|
|
decomposition rule, and use it to construct the reply. |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=back |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
transform() takes two parameters. The first is the string we want |
|
668
|
|
|
|
|
|
|
to transform. The second is a flag which indicates where this sting |
|
669
|
|
|
|
|
|
|
came from. If the flag is set, then the string has been pulled |
|
670
|
|
|
|
|
|
|
from memory, and we should use reassembly rules appropriate |
|
671
|
|
|
|
|
|
|
for that. If the flag is not set, then the string is the most |
|
672
|
|
|
|
|
|
|
recent user input, and we can use the ordinary reassembly rules. |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
The memory flag is only set when the transform() function is called |
|
675
|
|
|
|
|
|
|
recursively. The mechanism for setting this parameter is |
|
676
|
|
|
|
|
|
|
embedded in the transoform method itself. If the flag is set |
|
677
|
|
|
|
|
|
|
inappropriately, it is ignored. |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=cut |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
sub transform{ |
|
682
|
4
|
|
|
4
|
1
|
906
|
my ($self,$string,$use_memory) = @_; |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# Initialize the debugging text buffer. |
|
685
|
4
|
|
|
|
|
19
|
$self->debug_text(''); |
|
686
|
|
|
|
|
|
|
|
|
687
|
4
|
50
|
|
|
|
10
|
$self->debug_text(sprintf "\t[Pulling string \"$string\" from memory.]\n") |
|
688
|
|
|
|
|
|
|
if $use_memory; |
|
689
|
|
|
|
|
|
|
|
|
690
|
4
|
|
|
|
|
5
|
my ($i, @string_parts, $string_part, $rank, $goto, $reasmb, $keyword, |
|
691
|
|
|
|
|
|
|
$decomp, $this_decomp, $reasmbkey, @these_reasmbs, |
|
692
|
|
|
|
|
|
|
@decomp_matches, $synonyms, $synonym_index); |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# Default to a really low rank. |
|
695
|
4
|
|
|
|
|
5
|
$rank = -2; |
|
696
|
4
|
|
|
|
|
4
|
$reasmb = ""; |
|
697
|
4
|
|
|
|
|
5
|
$goto = ""; |
|
698
|
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# First run the string through the preprocessor. |
|
700
|
4
|
|
|
|
|
10
|
$string = $self->preprocess( $string ); |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
# Convert punctuation to periods. We will assume that commas |
|
703
|
|
|
|
|
|
|
# and certain conjunctions separate distinct thoughts/sentences. |
|
704
|
4
|
|
|
|
|
8
|
$string =~ s/[?!,]/./g; |
|
705
|
4
|
|
|
|
|
7
|
$string =~ s/but/./g; # Yikes! This is English-specific. |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# Split the string by periods into an array |
|
708
|
4
|
|
|
|
|
8
|
@string_parts = split /\./, $string ; |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# Examine each part of the input string in turn. |
|
711
|
4
|
|
|
|
|
7
|
STRING_PARTS: foreach $string_part (@string_parts) { |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# Run through the whole list of keywords. |
|
714
|
4
|
|
|
|
|
5
|
KEYWORD: foreach $keyword (keys %{ $self->{decomplist} }) { |
|
|
4
|
|
|
|
|
10
|
|
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# Check to see if the input string contains a keyword |
|
717
|
|
|
|
|
|
|
# which outranks any we have found previously |
|
718
|
|
|
|
|
|
|
# (On first loop, rank is set to -2.) |
|
719
|
12
|
100
|
66
|
|
|
181
|
if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto) |
|
|
|
|
66
|
|
|
|
|
|
720
|
|
|
|
|
|
|
and |
|
721
|
|
|
|
|
|
|
$rank < $self->{keyranks}->{$keyword} |
|
722
|
|
|
|
|
|
|
) |
|
723
|
|
|
|
|
|
|
{ |
|
724
|
|
|
|
|
|
|
# If we find one, then set $rank to equal |
|
725
|
|
|
|
|
|
|
# the rank of that keyword. |
|
726
|
3
|
|
|
|
|
7
|
$rank = $self->{keyranks}->{$keyword}; |
|
727
|
|
|
|
|
|
|
|
|
728
|
3
|
|
|
|
|
14
|
$self->debug_text($self->debug_text . sprintf "\t$rank> $keyword"); |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# Now let's check all the decomposition rules for that keyword. |
|
731
|
3
|
|
|
|
|
6
|
DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) { |
|
|
3
|
|
|
|
|
7
|
|
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# Change '*' to '\b(.*)\b' in this decomposition rule, |
|
734
|
|
|
|
|
|
|
# so we can use it for regular expressions. Later, |
|
735
|
|
|
|
|
|
|
# we will want to isolate individual matches to each wildcard. |
|
736
|
3
|
|
|
|
|
14
|
($this_decomp = $decomp) =~ s/\s*\*\s*/\\b\(\.\*\)\\b/g; |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# If this docomposition rule contains a word which begins with '@', |
|
739
|
|
|
|
|
|
|
# then the script also contained some synonyms for that word. |
|
740
|
|
|
|
|
|
|
# Find them all using %synon and generate a regular expression |
|
741
|
|
|
|
|
|
|
# containing all of them. |
|
742
|
3
|
50
|
|
|
|
14
|
if ($this_decomp =~ /\@/ ) { |
|
743
|
0
|
|
|
|
|
0
|
($synonym_index = $this_decomp) =~ s/.*\@(\w*).*/$1/i ; |
|
744
|
0
|
|
|
|
|
0
|
$synonyms = join ('|', @{ $self->{synon}->{$synonym_index} }); |
|
|
0
|
|
|
|
|
0
|
|
|
745
|
0
|
|
|
|
|
0
|
$this_decomp =~ s/(.*)\@$synonym_index(.*)/$1($synonym_index\|$synonyms)$2/g; |
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
|
|
748
|
3
|
|
|
|
|
12
|
$self->debug_text($self->debug_text . sprintf "\n\t\t: $decomp"); |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# Using the regular expression we just generated, |
|
751
|
|
|
|
|
|
|
# match against the input string. Use empty "()"'s to |
|
752
|
|
|
|
|
|
|
# eliminate warnings about uninitialized variables. |
|
753
|
3
|
50
|
|
|
|
57
|
if ($string_part =~ /$this_decomp()()()()()()()()()()/i) { |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# If this decomp rule matched the string, |
|
756
|
|
|
|
|
|
|
# then create an array, so that we can refer to matches |
|
757
|
|
|
|
|
|
|
# to individual wildcards. Use '0' as a placeholder |
|
758
|
|
|
|
|
|
|
# (we don't want to refer to any "zeroth" wildcard). |
|
759
|
3
|
|
|
|
|
25
|
@decomp_matches = ("0", $1, $2, $3, $4, $5, $6, $7, $8, $9); |
|
760
|
3
|
|
|
|
|
13
|
$self->debug_text($self->debug_text . sprintf " : @decomp_matches\n"); |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# Using the keyword and the decomposition rule, |
|
763
|
|
|
|
|
|
|
# reconstruct a key for the list of reassamble rules. |
|
764
|
3
|
|
|
|
|
7
|
$reasmbkey = join ($;,$keyword,$decomp); |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# Get the list of possible reassembly rules for this key. |
|
767
|
|
|
|
|
|
|
# |
|
768
|
3
|
50
|
33
|
|
|
11
|
if (defined $use_memory and $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0) { |
|
|
0
|
|
|
|
|
0
|
|
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
# If this transform function was invoked with the memory flag, |
|
771
|
|
|
|
|
|
|
# and there are in fact reassembly rules which are appropriate |
|
772
|
|
|
|
|
|
|
# for pulling out of memory, then include them. |
|
773
|
0
|
|
|
|
|
0
|
@these_reasmbs = @{ $self->{reasmblist_for_memory}->{$reasmbkey} } |
|
|
0
|
|
|
|
|
0
|
|
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
} else { |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
# Otherwise, just use the plain reassembly rules. |
|
778
|
|
|
|
|
|
|
# (This is what normally happens.) |
|
779
|
3
|
|
|
|
|
4
|
@these_reasmbs = @{ $self->{reasmblist}->{$reasmbkey} } |
|
|
3
|
|
|
|
|
9
|
|
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
# Pick out a reassembly rule at random. |
|
783
|
3
|
|
|
|
|
5
|
$reasmb = $these_reasmbs[ int &{$self->{myrand}}( scalar @these_reasmbs ) ]; |
|
|
3
|
|
|
|
|
7
|
|
|
784
|
|
|
|
|
|
|
|
|
785
|
3
|
|
|
|
|
14
|
$self->debug_text($self->debug_text . sprintf "\t\t--> $reasmb\n"); |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
# If the reassembly rule we picked contains the word "goto", |
|
788
|
|
|
|
|
|
|
# then we start over with a new keyword. Set $keyword to equal |
|
789
|
|
|
|
|
|
|
# that word, and start the whole loop over. |
|
790
|
3
|
50
|
|
|
|
10
|
if ($reasmb =~ m/^goto\s(\w*).*/i) { |
|
791
|
0
|
|
|
|
|
0
|
$self->debug_text($self->debug_text . sprintf "\$1 = $1\n"); |
|
792
|
0
|
|
|
|
|
0
|
$goto = $keyword = $1; |
|
793
|
0
|
|
|
|
|
0
|
$rank = -2; |
|
794
|
0
|
|
|
|
|
0
|
redo KEYWORD; |
|
795
|
|
|
|
|
|
|
} |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# Otherwise, using the matches to wildcards which we stored above, |
|
798
|
|
|
|
|
|
|
# insert words from the input string back into the reassembly rule. |
|
799
|
|
|
|
|
|
|
# [THANKS to Gidon Wise for submitting a bugfix here] |
|
800
|
3
|
|
|
|
|
9
|
for ($i=1; $i <= $#decomp_matches; $i++) { |
|
801
|
27
|
|
|
|
|
72
|
$decomp_matches[$i] = $self->postprocess( $decomp_matches[$i] ); |
|
802
|
27
|
|
|
|
|
91
|
$decomp_matches[$i] =~ s/([,;?!]|\.*)$//; |
|
803
|
27
|
|
|
|
|
269
|
$reasmb =~ s/\($i\)/$decomp_matches[$i]/g; |
|
804
|
|
|
|
|
|
|
} |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# Move on to the next keyword. If no other keywords match, |
|
807
|
|
|
|
|
|
|
# then we'll end up actually using the $reasmb string |
|
808
|
|
|
|
|
|
|
# we just generated above. |
|
809
|
3
|
|
|
|
|
11
|
next KEYWORD ; |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
} # End if ($string_part =~ /$this_decomp/i) |
|
812
|
|
|
|
|
|
|
|
|
813
|
0
|
|
|
|
|
0
|
$self->debug_text($self->debug_text . sprintf "\n"); |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
} # End DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
} # End if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto) |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
} # End KEYWORD: foreach $keyword (keys %{ $self->{decomplist}) |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
} # End STRING_PARTS: foreach $string_part (@string_parts) { |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=head2 How memory is used |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
In the script, some reassembly rules are special. They are marked with |
|
826
|
|
|
|
|
|
|
the keyword "reasm_for_memory", rather than just "reasm". |
|
827
|
|
|
|
|
|
|
Eliza "remembers" any comment when it matches a docomposition rule |
|
828
|
|
|
|
|
|
|
for which there are any reassembly rules for memory. |
|
829
|
|
|
|
|
|
|
An Eliza object remembers up to C<$max_memory_size> (default: 5) |
|
830
|
|
|
|
|
|
|
user input strings. |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
If, during a subsequent run, the transform() method fails to find any |
|
833
|
|
|
|
|
|
|
appropriate decomposition rule for a user's comment, and if there are |
|
834
|
|
|
|
|
|
|
any comments inside the memory array, then Eliza may elect to ignore |
|
835
|
|
|
|
|
|
|
the most recent comment and instead pull out one of the strings from memory. |
|
836
|
|
|
|
|
|
|
In this case, the transform method is called recursively with the memory flag. |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
Honestly, I am not sure exactly how this memory functionality |
|
839
|
|
|
|
|
|
|
was implemented in the original Eliza program. Hopefully |
|
840
|
|
|
|
|
|
|
this implementation is not too far from Weizenbaum's. |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
If you don't want to use the memory functionality at all, |
|
843
|
|
|
|
|
|
|
then you can disable it: |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
$mybot->memory_on(0); |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
You can also achieve the same effect by making sure |
|
848
|
|
|
|
|
|
|
that the script data does not contain any reassembly rules |
|
849
|
|
|
|
|
|
|
marked with the keyword "reasm_for_memory". The default |
|
850
|
|
|
|
|
|
|
script data only has 4 such items. |
|
851
|
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=cut |
|
853
|
|
|
|
|
|
|
|
|
854
|
4
|
100
|
|
|
|
21
|
if ($reasmb eq "") { |
|
|
|
50
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
# If all else fails, call this method recursively |
|
857
|
|
|
|
|
|
|
# and make sure that it has something to parse. |
|
858
|
|
|
|
|
|
|
# Use a string from memory if anything is available. |
|
859
|
|
|
|
|
|
|
# |
|
860
|
|
|
|
|
|
|
# $self-likelihood_of_using_memory should be some number |
|
861
|
|
|
|
|
|
|
# between 1 and 0; it defaults to 1. |
|
862
|
|
|
|
|
|
|
# |
|
863
|
1
|
50
|
33
|
|
|
2
|
if ( |
|
864
|
1
|
|
|
|
|
5
|
$#{ $self->memory } >= 0 |
|
865
|
|
|
|
|
|
|
and |
|
866
|
0
|
|
|
|
|
0
|
&{$self->{myrand}}(1) >= 1 - $self->likelihood_of_using_memory |
|
867
|
|
|
|
|
|
|
) { |
|
868
|
|
|
|
|
|
|
|
|
869
|
0
|
|
|
|
|
0
|
$reasmb = $self->transform( shift @{ $self->memory }, "use memory" ); |
|
|
0
|
|
|
|
|
0
|
|
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
} else { |
|
872
|
1
|
|
|
|
|
8
|
$reasmb = $self->transform("xnone"); |
|
873
|
|
|
|
|
|
|
} |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
} elsif ($self->memory_on) { |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
# If memory is switched on, then we handle memory. |
|
878
|
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
# Now that we have successfully transformed this string, |
|
880
|
|
|
|
|
|
|
# push it onto the end of the memory stack... unless, of course, |
|
881
|
|
|
|
|
|
|
# that's where we got it from in the first place, or if the rank |
|
882
|
|
|
|
|
|
|
# is not the kind we remember. |
|
883
|
|
|
|
|
|
|
# |
|
884
|
3
|
50
|
33
|
|
|
4
|
if ( |
|
885
|
3
|
|
|
|
|
14
|
$#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0 |
|
886
|
|
|
|
|
|
|
and |
|
887
|
|
|
|
|
|
|
not defined $use_memory |
|
888
|
|
|
|
|
|
|
) { |
|
889
|
|
|
|
|
|
|
|
|
890
|
0
|
|
|
|
|
0
|
push @{ $self->memory },$string ; |
|
|
0
|
|
|
|
|
0
|
|
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# Shift out the least-recent item from the bottom |
|
894
|
|
|
|
|
|
|
# of the memory stack if the stack exceeds the max size. |
|
895
|
3
|
50
|
|
|
|
4
|
shift @{ $self->memory } if $#{ $self->memory } >= $self->max_memory_size; |
|
|
0
|
|
|
|
|
0
|
|
|
|
3
|
|
|
|
|
20
|
|
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
$self->debug_text($self->debug_text |
|
898
|
3
|
|
|
|
|
18
|
. sprintf("\t%d item(s) in memory.\n", $#{ $self->memory } + 1 ) ) ; |
|
|
3
|
|
|
|
|
17
|
|
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
} # End if ($reasmb eq "") |
|
901
|
|
|
|
|
|
|
|
|
902
|
4
|
|
|
|
|
10
|
$reasmb =~ tr/ / /s; # Eliminate any duplicate space characters. |
|
903
|
4
|
|
|
|
|
8
|
$reasmb =~ s/[ ][?]$/?/; # Eliminate any spaces before the question mark. |
|
904
|
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
# Save the return string so that forgetful calling programs |
|
906
|
|
|
|
|
|
|
# can ask the bot what the last reply was. |
|
907
|
4
|
|
|
|
|
17
|
$self->transform_text($reasmb); |
|
908
|
|
|
|
|
|
|
|
|
909
|
4
|
|
|
|
|
17
|
return $reasmb ; |
|
910
|
|
|
|
|
|
|
} |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
#################################################################### |
|
914
|
|
|
|
|
|
|
# --- parse_script_data --- |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
=head2 parse_script_data() |
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
$self->parse_script_data; |
|
919
|
|
|
|
|
|
|
$self->parse_script_data( $script_file ); |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
parse_script_data() is invoked from the _initialize() method, |
|
922
|
|
|
|
|
|
|
which is called from the new() function. However, you can also |
|
923
|
|
|
|
|
|
|
call this method at any time against an already-instantiated |
|
924
|
|
|
|
|
|
|
Eliza instance. In that case, the new script data is I |
|
925
|
|
|
|
|
|
|
to the old script data. The old script data is not deleted. |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
You can pass a parameter to this function, which is the name of the |
|
928
|
|
|
|
|
|
|
script file, and it will read in and parse that file. |
|
929
|
|
|
|
|
|
|
If you do not pass any parameter to this method, then |
|
930
|
|
|
|
|
|
|
it will read the data embedded at the end of the module as its |
|
931
|
|
|
|
|
|
|
default script data. |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
If you pass the name of a script file to parse_script_data(), |
|
934
|
|
|
|
|
|
|
and that file is not available for reading, then the module dies. |
|
935
|
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=head1 Format of the script file |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
This module includes a default script file within itself, |
|
940
|
|
|
|
|
|
|
so it is not necessary to explicitly specify a script file |
|
941
|
|
|
|
|
|
|
when instantiating an Eliza object. |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
Each line in the script file can specify a key, |
|
944
|
|
|
|
|
|
|
a decomposition rule, or a reassembly rule. |
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
key: remember 5 |
|
947
|
|
|
|
|
|
|
decomp: * i remember * |
|
948
|
|
|
|
|
|
|
reasmb: Do you often think of (2) ? |
|
949
|
|
|
|
|
|
|
reasmb: Does thinking of (2) bring anything else to mind ? |
|
950
|
|
|
|
|
|
|
decomp: * do you remember * |
|
951
|
|
|
|
|
|
|
reasmb: Did you think I would forget (2) ? |
|
952
|
|
|
|
|
|
|
reasmb: What about (2) ? |
|
953
|
|
|
|
|
|
|
reasmb: goto what |
|
954
|
|
|
|
|
|
|
pre: equivalent alike |
|
955
|
|
|
|
|
|
|
synon: belief feel think believe wish |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
The number after the key specifies the rank. |
|
958
|
|
|
|
|
|
|
If a user's input contains the keyword, then |
|
959
|
|
|
|
|
|
|
the transform() function will try to match |
|
960
|
|
|
|
|
|
|
one of the decomposition rules for that keyword. |
|
961
|
|
|
|
|
|
|
If one matches, then it will select one of |
|
962
|
|
|
|
|
|
|
the reassembly rules at random. The number |
|
963
|
|
|
|
|
|
|
(2) here means "use whatever set of words |
|
964
|
|
|
|
|
|
|
matched the second asterisk in the decomposition |
|
965
|
|
|
|
|
|
|
rule." |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
If you specify a list of synonyms for a word, |
|
968
|
|
|
|
|
|
|
the you should use a "@" when you use that |
|
969
|
|
|
|
|
|
|
word in a decomposition rule: |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
decomp: * i @belief i * |
|
972
|
|
|
|
|
|
|
reasmb: Do you really think so ? |
|
973
|
|
|
|
|
|
|
reasmb: But you are not sure you (3). |
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
Otherwise, the script will never check to see |
|
976
|
|
|
|
|
|
|
if there are any synonyms for that keyword. |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
Reassembly rules should be marked with I |
|
979
|
|
|
|
|
|
|
rather than I when it is appropriate for use |
|
980
|
|
|
|
|
|
|
when a user's comment has been extracted from memory. |
|
981
|
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
key: my 2 |
|
983
|
|
|
|
|
|
|
decomp: * my * |
|
984
|
|
|
|
|
|
|
reasm_for_memory: Let's discuss further why your (2). |
|
985
|
|
|
|
|
|
|
reasm_for_memory: Earlier you said your (2). |
|
986
|
|
|
|
|
|
|
reasm_for_memory: But your (2). |
|
987
|
|
|
|
|
|
|
reasm_for_memory: Does that have anything to do with the fact that your (2) ? |
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head1 How the script file is parsed |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Each line in the script file contains an "entrytype" |
|
992
|
|
|
|
|
|
|
(key, decomp, synon) and an "entry", separated by |
|
993
|
|
|
|
|
|
|
a colon. In turn, each "entry" can itself be |
|
994
|
|
|
|
|
|
|
composed of a "key" and a "value", separated by |
|
995
|
|
|
|
|
|
|
a space. The parse_script_data() function |
|
996
|
|
|
|
|
|
|
parses each line out, and splits the "entry" and |
|
997
|
|
|
|
|
|
|
"entrytype" portion of each line into two variables, |
|
998
|
|
|
|
|
|
|
C<$entry> and C<$entrytype>. |
|
999
|
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
Next, it uses the string C<$entrytype> to determine |
|
1001
|
|
|
|
|
|
|
what sort of stuff to expect in the C<$entry> variable, |
|
1002
|
|
|
|
|
|
|
if anything, and parses it accordingly. In some cases, |
|
1003
|
|
|
|
|
|
|
there is no second level of key-value pair, so the function |
|
1004
|
|
|
|
|
|
|
does not even bother to isolate or create C<$key> and C<$value>. |
|
1005
|
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
C<$key> is always a single word. C<$value> can be null, |
|
1007
|
|
|
|
|
|
|
or one single word, or a string composed of several words, |
|
1008
|
|
|
|
|
|
|
or an array of words. |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
Based on all these entries and keys and values, |
|
1011
|
|
|
|
|
|
|
the function creates two giant hashes: |
|
1012
|
|
|
|
|
|
|
C<%decomplist>, which holds the decomposition rules for |
|
1013
|
|
|
|
|
|
|
each keyword, and C<%reasmblist>, which holds the |
|
1014
|
|
|
|
|
|
|
reassembly phrases for each decomposition rule. |
|
1015
|
|
|
|
|
|
|
It also creates C<%keyranks>, which holds the ranks for |
|
1016
|
|
|
|
|
|
|
each key. |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
Six other arrays are created: C<%reasm_for_memory, %pre, %post, |
|
1019
|
|
|
|
|
|
|
%synon, @initial,> and C<@final>. |
|
1020
|
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
=cut |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
sub parse_script_data { |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
1
|
|
|
1
|
1
|
2
|
my ($self,$scriptfile) = @_; |
|
1026
|
1
|
|
|
|
|
2
|
my @scriptlines; |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
1
|
50
|
|
|
|
3
|
if ($scriptfile) { |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
# If we have an external script file, open it |
|
1031
|
|
|
|
|
|
|
# and read it in (the whole thing, all at once). |
|
1032
|
1
|
50
|
|
|
|
35
|
open (SCRIPTFILE, "<$scriptfile") |
|
1033
|
|
|
|
|
|
|
or die "Could not read from file $scriptfile : $!\n"; |
|
1034
|
1
|
|
|
|
|
27
|
@scriptlines = ; # read in script data |
|
1035
|
1
|
|
|
|
|
8
|
$self->scriptfile($scriptfile); |
|
1036
|
1
|
|
|
|
|
7
|
close (SCRIPTFILE); |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
} else { |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# Otherwise, read in the data from the bottom |
|
1041
|
|
|
|
|
|
|
# of this file. This data might be read several |
|
1042
|
|
|
|
|
|
|
# times, so we save the offset pointer and |
|
1043
|
|
|
|
|
|
|
# reset it when we're done. |
|
1044
|
0
|
|
|
|
|
0
|
my $where= tell(DATA); |
|
1045
|
0
|
|
|
|
|
0
|
@scriptlines = ; # read in script data |
|
1046
|
0
|
|
|
|
|
0
|
seek(DATA, $where, 0); |
|
1047
|
0
|
|
|
|
|
0
|
$self->scriptfile(''); |
|
1048
|
|
|
|
|
|
|
} |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
1
|
|
|
|
|
3
|
my ($entrytype, $entry, $key, $value) ; |
|
1051
|
1
|
|
|
|
|
2
|
my $thiskey = ""; |
|
1052
|
1
|
|
|
|
|
2
|
my $thisdecomp = ""; |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
############################################################ |
|
1055
|
|
|
|
|
|
|
# Examine each line of script data. |
|
1056
|
1
|
|
|
|
|
3
|
for (@scriptlines) { |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
# Skip comments and lines with only whitespace. |
|
1059
|
9
|
50
|
33
|
|
|
53
|
next if (/^\s*#/ || /^\s*$/); |
|
1060
|
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
# Split entrytype and entry, using a colon as the delimiter. |
|
1062
|
9
|
|
|
|
|
43
|
($entrytype, $entry) = $_ =~ m/^\s*(\S*)\s*:\s*(.*)\s*$/; |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# Case loop, based on the entrytype. |
|
1065
|
9
|
|
|
|
|
18
|
for ($entrytype) { |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
9
|
50
|
|
|
|
21
|
/quit/ and do { push @{ $self->{quit} }, $entry; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1068
|
9
|
50
|
|
|
|
18
|
/initial/ and do { push @{ $self->{initial} }, $entry; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1069
|
9
|
50
|
|
|
|
18
|
/final/ and do { push @{ $self->{final} }, $entry; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1070
|
|
|
|
|
|
|
|
|
1071
|
9
|
100
|
|
|
|
18
|
/decomp/ and do { |
|
1072
|
3
|
50
|
|
|
|
10
|
die "$0: error parsing script: decomposition rule with no keyword.\n" |
|
1073
|
|
|
|
|
|
|
if $thiskey eq ""; |
|
1074
|
3
|
|
|
|
|
7
|
$thisdecomp = join($;,$thiskey,$entry); |
|
1075
|
3
|
|
|
|
|
4
|
push @{ $self->{decomplist}->{$thiskey} }, $entry ; |
|
|
3
|
|
|
|
|
10
|
|
|
1076
|
3
|
|
|
|
|
5
|
last; |
|
1077
|
|
|
|
|
|
|
}; |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
6
|
100
|
|
|
|
15
|
/reasmb/ and do { |
|
1080
|
3
|
50
|
|
|
|
8
|
die "$0: error parsing script: reassembly rule with no decomposition rule.\n" |
|
1081
|
|
|
|
|
|
|
if $thisdecomp eq ""; |
|
1082
|
3
|
|
|
|
|
4
|
push @{ $self->{reasmblist}->{$thisdecomp} }, $entry ; |
|
|
3
|
|
|
|
|
10
|
|
|
1083
|
3
|
|
|
|
|
8
|
last; |
|
1084
|
|
|
|
|
|
|
}; |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
3
|
50
|
|
|
|
7
|
/reasm_for_memory/ and do { |
|
1087
|
0
|
0
|
|
|
|
0
|
die "$0: error parsing script: reassembly rule with no decomposition rule.\n" |
|
1088
|
|
|
|
|
|
|
if $thisdecomp eq ""; |
|
1089
|
0
|
|
|
|
|
0
|
push @{ $self->{reasmblist_for_memory}->{$thisdecomp} }, $entry ; |
|
|
0
|
|
|
|
|
0
|
|
|
1090
|
0
|
|
|
|
|
0
|
last; |
|
1091
|
|
|
|
|
|
|
}; |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# The entrytypes below actually expect to see a key and value |
|
1094
|
|
|
|
|
|
|
# pair in the entry, so we split them out. The first word, |
|
1095
|
|
|
|
|
|
|
# separated by a space, is the key, and everything else is |
|
1096
|
|
|
|
|
|
|
# an array of values. |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
3
|
|
|
|
|
10
|
($key,$value) = $entry =~ m/^\s*(\S*)\s*(.*)/; |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
3
|
50
|
|
|
|
10
|
/pre/ and do { $self->{pre}->{$key} = $value; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1101
|
3
|
50
|
|
|
|
6
|
/post/ and do { $self->{post}->{$key} = $value; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1102
|
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
# synon expects an array, so we split $value into an array, using " " as delimiter. |
|
1104
|
3
|
50
|
|
|
|
7
|
/synon/ and do { $self->{synon}->{$key} = [ split /\ /, $value ]; last; }; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1105
|
|
|
|
|
|
|
|
|
1106
|
3
|
50
|
|
|
|
8
|
/key/ and do { |
|
1107
|
3
|
|
|
|
|
5
|
$thiskey = $key; |
|
1108
|
3
|
|
|
|
|
5
|
$thisdecomp = ""; |
|
1109
|
3
|
|
|
|
|
16
|
$self->{keyranks}->{$thiskey} = $value ; |
|
1110
|
3
|
|
|
|
|
22
|
last; |
|
1111
|
|
|
|
|
|
|
}; |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
} # End for ($entrytype) (case loop) |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
} # End for (@scriptlines) |
|
1116
|
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
} # End of method parse_script_data |
|
1118
|
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
# Eliminate some pesky warnings. |
|
1121
|
|
|
|
|
|
|
# |
|
1122
|
|
|
|
0
|
|
|
sub DESTROY {} |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
# ---{ E N D M E T H O D S }---------------------------------- |
|
1126
|
|
|
|
|
|
|
#################################################################### |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
1; # Return a true value. |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
This software is copyright (c) 2003 by John Nolan Ejpnolan@sonic.netE. |
|
1134
|
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
1136
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
John Nolan jpnolan@sonic.net January 2003. |
|
1141
|
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
Implements the classic Eliza algorithm by Prof. Joseph Weizenbaum. |
|
1143
|
|
|
|
|
|
|
Script format devised by Charles Hayden. |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
=cut |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
#################################################################### |
|
1150
|
|
|
|
|
|
|
# ---{ B E G I N D E F A U L T S C R I P T D A T A }---------- |
|
1151
|
|
|
|
|
|
|
# |
|
1152
|
|
|
|
|
|
|
# This script was prepared by Chris Hayden. Hayden's Eliza |
|
1153
|
|
|
|
|
|
|
# program was written in Java, however, it attempted to match |
|
1154
|
|
|
|
|
|
|
# the functionality of Weizenbaum's original program as closely |
|
1155
|
|
|
|
|
|
|
# as possible. |
|
1156
|
|
|
|
|
|
|
# |
|
1157
|
|
|
|
|
|
|
# Hayden's script format was quite different from Weizenbaum's, |
|
1158
|
|
|
|
|
|
|
# but it maintained the same content. I have adapted Hayden's |
|
1159
|
|
|
|
|
|
|
# script format, since it was simple and convenient enough |
|
1160
|
|
|
|
|
|
|
# for my purposes. |
|
1161
|
|
|
|
|
|
|
# |
|
1162
|
|
|
|
|
|
|
# I've made small modifications here and there. |
|
1163
|
|
|
|
|
|
|
# |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
# We use the token __DATA__ rather than __END__, |
|
1166
|
|
|
|
|
|
|
# so that all this data is visible within the current package. |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
__DATA__ |