| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Games::Rezrov::Speech; |
|
2
|
|
|
|
|
|
|
# ZIO functions for speech synthesis (output) and recognition (input) |
|
3
|
|
|
|
|
|
|
# mne feb. 2004 |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# HACK: this is just a subset of ZIO functions rather than a complete |
|
6
|
|
|
|
|
|
|
# module. I did it this way just to isolate the speech clutter from |
|
7
|
|
|
|
|
|
|
# ZIO_Generic. |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# will probably have to change radically as new speech APIs added |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# ADD: |
|
12
|
|
|
|
|
|
|
# - error message method explaining problem if enable calls fail |
|
13
|
|
|
|
|
|
|
# - event-based recognition model if ZIO can handle it: allow |
|
14
|
|
|
|
|
|
|
# user also to type commands if they want (Tk interface could handle this, |
|
15
|
|
|
|
|
|
|
# but a ZIO using say '$line = ' couldn't due to hang) |
|
16
|
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
30
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
4
|
use Games::Rezrov::ZIO_Tools; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
55
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
1
|
|
|
|
|
9
|
use Games::Rezrov::MethodMaker qw( |
|
22
|
|
|
|
|
|
|
speaking |
|
23
|
|
|
|
|
|
|
listening |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
voicetext |
|
26
|
|
|
|
|
|
|
voice_dictation |
|
27
|
|
|
|
|
|
|
recognized_phrases |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
recog_dictionary_setup |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
speech_synthesis_error |
|
32
|
|
|
|
|
|
|
speech_recognition_error |
|
33
|
1
|
|
|
1
|
|
6
|
); |
|
|
1
|
|
|
|
|
2
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# constants copied from Microsoft Speech SDK\Include\speech.h |
|
36
|
|
|
|
|
|
|
# ...are they accessible somewhere "officially" in Perl API? Dunno. |
|
37
|
1
|
|
|
1
|
|
4
|
use constant VSRMODE_OFF => 0x00000002; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
45
|
|
|
38
|
1
|
|
|
1
|
|
5
|
use constant VSRMODE_DISABLED => 0x00000001; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
31
|
|
|
39
|
1
|
|
|
1
|
|
4
|
use constant VSRMODE_CMDPAUSED => 0x00000004; |
|
|
1
|
|
|
|
|
11
|
|
|
|
1
|
|
|
|
|
36
|
|
|
40
|
1
|
|
|
1
|
|
5
|
use constant VSRMODE_CMDONLY => 0x00000010; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
41
|
1
|
|
|
1
|
|
5
|
use constant VSRMODE_DCTONLY => 0x00000020; |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
37
|
|
|
42
|
1
|
|
|
1
|
|
4
|
use constant VSRMODE_CMDANDDCT => 0x00000040; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1687
|
|
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub init_speech_synthesis { |
|
45
|
|
|
|
|
|
|
# |
|
46
|
|
|
|
|
|
|
# try to enable speech output, return success or failure |
|
47
|
|
|
|
|
|
|
# |
|
48
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
49
|
0
|
|
|
|
|
|
my $result = 0; |
|
50
|
0
|
0
|
|
|
|
|
if (find_module('Win32::SAPI4')) { |
|
51
|
0
|
|
|
|
|
|
require Win32::SAPI4; |
|
52
|
0
|
|
|
|
|
|
require Win32::OLE; |
|
53
|
0
|
|
|
|
|
|
import Win32::SAPI4; |
|
54
|
0
|
|
|
|
|
|
import Win32::OLE; |
|
55
|
0
|
|
|
|
|
|
my $vt = Win32::SAPI4::VoiceText->new(); |
|
56
|
0
|
|
|
|
|
|
$self->voicetext($vt); |
|
57
|
0
|
|
|
|
|
|
$result = 1; |
|
58
|
|
|
|
|
|
|
} else { |
|
59
|
0
|
|
|
|
|
|
$self->speech_synthesis_error("Can't enable speech synthesis because the required support module Win32::SAPI4 is not installed."); |
|
60
|
|
|
|
|
|
|
} |
|
61
|
0
|
|
|
|
|
|
return $self->speaking($result); |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub test_speech_recognition { |
|
65
|
|
|
|
|
|
|
# |
|
66
|
|
|
|
|
|
|
# interactively debug recognition engine |
|
67
|
|
|
|
|
|
|
# |
|
68
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
69
|
0
|
0
|
|
|
|
|
if (find_module('Win32::SAPI4')) { |
|
70
|
0
|
|
|
|
|
|
require Win32::SAPI4; |
|
71
|
0
|
|
|
|
|
|
require Win32::OLE; |
|
72
|
0
|
|
|
|
|
|
import Win32::SAPI4; |
|
73
|
0
|
|
|
|
|
|
import Win32::OLE; |
|
74
|
|
|
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
my $done = 0; |
|
76
|
|
|
|
|
|
|
$SIG{INT} = sub { |
|
77
|
0
|
|
|
0
|
|
|
print "[caught interrupt]\n"; |
|
78
|
0
|
|
|
|
|
|
$done = 1; |
|
79
|
0
|
|
|
|
|
|
}; |
|
80
|
|
|
|
|
|
|
|
|
81
|
0
|
|
|
|
|
|
my %stop_words = map {$_, 1} qw( |
|
|
0
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
stop |
|
83
|
|
|
|
|
|
|
quit |
|
84
|
|
|
|
|
|
|
exit |
|
85
|
|
|
|
|
|
|
finish |
|
86
|
|
|
|
|
|
|
done |
|
87
|
|
|
|
|
|
|
); |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
|
my $comment_tag = '==>'; |
|
90
|
|
|
|
|
|
|
my $event_monitor = sub { |
|
91
|
0
|
|
|
0
|
|
|
my ($obj, $event, @args) = @_; |
|
92
|
0
|
|
|
|
|
|
printf "%s\n", join ",", $event, @args; |
|
93
|
0
|
0
|
0
|
|
|
|
$done = 1 if $event eq "PhraseFinish" and exists $stop_words{$args[1]}; |
|
94
|
0
|
|
|
|
|
|
}; |
|
95
|
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
|
my $vd = Win32::SAPI4::VoiceDictation->new(); |
|
97
|
0
|
|
|
|
|
|
Win32::OLE->WithEvents($vd, $event_monitor); |
|
98
|
0
|
|
|
|
|
|
my $mode = $vd->Mode; |
|
99
|
0
|
|
|
|
|
|
printf "Speech recognition mode is %d (%s)\n", $mode, describe_dictation_mode($mode); |
|
100
|
|
|
|
|
|
|
|
|
101
|
0
|
0
|
|
|
|
|
if (is_dictation_enabled($vd)) { |
|
102
|
0
|
|
|
|
|
|
printf "%s Say \"stop\", \"quit\", \"exit\" or hit ctrl-c to finish.\n\n", $comment_tag; |
|
103
|
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
$vd->Deactivate(); |
|
105
|
0
|
|
|
|
|
|
$vd->Activate(); |
|
106
|
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
while (!$done) { |
|
108
|
0
|
|
|
|
|
|
Win32::OLE->SpinMessageLoop(); |
|
109
|
0
|
|
|
|
|
|
Win32::Sleep(20); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
$vd->Deactivate(); |
|
113
|
|
|
|
|
|
|
} else { |
|
114
|
0
|
|
|
|
|
|
printf "PROBLEM: speech recognition won't work because dictation is not enabled!\n"; |
|
115
|
0
|
|
|
|
|
|
print "Run the Microsoft Dictation application and enable it.\n"; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub describe_dictation_mode { |
|
121
|
|
|
|
|
|
|
# static |
|
122
|
0
|
|
|
0
|
0
|
|
my ($mode) = @_; |
|
123
|
0
|
|
|
|
|
|
my $desc; |
|
124
|
0
|
0
|
|
|
|
|
if ($mode == VSRMODE_DISABLED) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
$desc = "disabled"; |
|
126
|
|
|
|
|
|
|
} elsif ($mode == VSRMODE_OFF) { |
|
127
|
0
|
|
|
|
|
|
$desc = "off"; |
|
128
|
|
|
|
|
|
|
} elsif ($mode == VSRMODE_CMDPAUSED) { |
|
129
|
0
|
|
|
|
|
|
$desc = "paused"; |
|
130
|
|
|
|
|
|
|
} elsif ($mode == VSRMODE_CMDONLY) { |
|
131
|
0
|
|
|
|
|
|
$desc = "voice commands only"; |
|
132
|
|
|
|
|
|
|
} elsif ($mode == VSRMODE_DCTONLY) { |
|
133
|
0
|
|
|
|
|
|
$desc = "dictation only"; |
|
134
|
|
|
|
|
|
|
} elsif ($mode == VSRMODE_CMDANDDCT) { |
|
135
|
0
|
|
|
|
|
|
$desc = "voice commands and dictation"; |
|
136
|
|
|
|
|
|
|
} else { |
|
137
|
0
|
|
|
|
|
|
$desc = "UNKNOWN (??)"; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
0
|
|
|
|
|
|
return $desc; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub speak { |
|
143
|
0
|
|
|
0
|
0
|
|
my ($self, $string, %options) = @_; |
|
144
|
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
$string =~ s/([A-Z]\w+ )(I+)/$1 . length($2)/e; |
|
|
0
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# convert Roman numerals |
|
147
|
|
|
|
|
|
|
# "Zork I" should be pronounced "Zork 1" rather than "Zork Eye" |
|
148
|
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
$string =~ s/(\w+ \d+ \/ Serial number )(\d\d)(\d\d)(\d\d)/$1 $2 $3 $4/i; |
|
150
|
|
|
|
|
|
|
# pronounce serial numbers for what they mean: YY MM DD |
|
151
|
|
|
|
|
|
|
# "86 09 04" rather than "860,904" |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
$string =~ s/Infocom/InfoCom/; |
|
154
|
|
|
|
|
|
|
# phonetic hack to aid SAPI pronunciation; "info-cum" => "info-com" |
|
155
|
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
$string =~ s/^Copyright \(c\)/Copyright/; |
|
157
|
|
|
|
|
|
|
# just "copyright", not "copyright (c)" |
|
158
|
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$string =~ s/^\[(.*)\]$/$1/; |
|
160
|
|
|
|
|
|
|
# don't pronounce the brackets around asides, e.g. |
|
161
|
|
|
|
|
|
|
# "[I don't know the work "bogus".]" |
|
162
|
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
$string =~ s/\.{3,}/\. /; |
|
164
|
|
|
|
|
|
|
# sometimes pronounces this as "point", e.g. |
|
165
|
|
|
|
|
|
|
# "If you insist.... Poof, you're dead!" |
|
166
|
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
|
$string =~ s/\"$//; |
|
168
|
|
|
|
|
|
|
# prevent pronouncing literal "quote" at end, e.g. Zork 1 reading leaflet |
|
169
|
|
|
|
|
|
|
|
|
170
|
0
|
0
|
|
|
|
|
return if $string eq ">"; |
|
171
|
|
|
|
|
|
|
# don't speak game prompt, HACK |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
my $vt = $self->voicetext(); |
|
174
|
|
|
|
|
|
|
|
|
175
|
0
|
|
0
|
|
|
|
my $gender = $options{"-gender"} || 1; |
|
176
|
|
|
|
|
|
|
# my $i = $vt->find("Mfg=Microsoft;Gender=2;ModeName=Sam"); |
|
177
|
0
|
|
|
|
|
|
my $i = $vt->find("Gender=$gender"); |
|
178
|
0
|
|
|
|
|
|
$vt->Select($i); |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# $vt->Speed(200); |
|
181
|
|
|
|
|
|
|
# why doesn't this method work? |
|
182
|
|
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
|
$self->update(); |
|
184
|
|
|
|
|
|
|
|
|
185
|
0
|
|
|
|
|
|
my $speech_done = 0; |
|
186
|
|
|
|
|
|
|
my $finish_callback = sub { |
|
187
|
0
|
|
|
0
|
|
|
my ($obj, $event, @args) = @_; |
|
188
|
0
|
0
|
|
|
|
|
$speech_done = 1 if $event eq "SpeakingDone"; |
|
189
|
|
|
|
|
|
|
# wait for event signalling speech output is complete |
|
190
|
0
|
|
|
|
|
|
}; |
|
191
|
|
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
|
Win32::OLE->WithEvents($vt, $finish_callback); |
|
193
|
0
|
|
|
|
|
|
$vt->Speak($string); |
|
194
|
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
while (!$speech_done) { |
|
196
|
|
|
|
|
|
|
# wait for speech output to finish |
|
197
|
0
|
|
|
|
|
|
Win32::OLE->SpinMessageLoop(); |
|
198
|
0
|
|
|
|
|
|
Win32::Sleep(20); |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub init_speech_recognition { |
|
203
|
|
|
|
|
|
|
# |
|
204
|
|
|
|
|
|
|
# try to enable speech input, return success or failure |
|
205
|
|
|
|
|
|
|
# |
|
206
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
207
|
0
|
|
|
|
|
|
my $result = 0; |
|
208
|
0
|
0
|
|
|
|
|
if (find_module('Win32::SAPI4')) { |
|
209
|
0
|
|
|
|
|
|
require Win32::SAPI4; |
|
210
|
0
|
|
|
|
|
|
require Win32::OLE; |
|
211
|
0
|
|
|
|
|
|
import Win32::SAPI4; |
|
212
|
0
|
|
|
|
|
|
import Win32::OLE; |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
my $vd = Win32::SAPI4::VoiceDictation->new(); |
|
215
|
0
|
|
|
|
|
|
$vd->Deactivate(); |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
$SIG{INT} = sub { |
|
218
|
0
|
|
|
0
|
|
|
$vd->Deactivate(); |
|
219
|
0
|
|
|
|
|
|
}; |
|
220
|
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
|
$self->recognized_phrases([]); |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
my $event_monitor = sub { |
|
224
|
0
|
|
|
0
|
|
|
my ($obj, $event, @args) = @_; |
|
225
|
|
|
|
|
|
|
# printf "%s\n", join ",", $event, @args; |
|
226
|
0
|
0
|
|
|
|
|
if ($event eq "PhraseFinish") { |
|
227
|
0
|
|
|
|
|
|
push @{$self->recognized_phrases}, $args[1]; |
|
|
0
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
} |
|
229
|
0
|
|
|
|
|
|
}; |
|
230
|
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
Win32::OLE->WithEvents($vd, $event_monitor); |
|
232
|
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
my $mode = $vd->Mode; |
|
234
|
0
|
0
|
|
|
|
|
if (is_dictation_enabled($vd)) { |
|
235
|
|
|
|
|
|
|
# OK to proceed |
|
236
|
0
|
|
|
|
|
|
$self->voice_dictation($vd); |
|
237
|
0
|
|
|
|
|
|
$vd->Activate(); |
|
238
|
0
|
|
|
|
|
|
$result = 1; |
|
239
|
|
|
|
|
|
|
} else { |
|
240
|
0
|
|
|
|
|
|
$self->speech_recognition_error("Can't enable speech recognition because dictation is not enabled. Run Microsoft Dictation and enable it, then try again."); |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
} else { |
|
243
|
0
|
|
|
|
|
|
$self->speech_recognition_error("Can't enable speech recognition because the required support module Win32::SAPI4 is not installed."); |
|
244
|
|
|
|
|
|
|
} |
|
245
|
0
|
|
|
|
|
|
return $self->listening($result); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub recognize_line { |
|
249
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
250
|
0
|
0
|
|
|
|
|
unless ($self->recog_dictionary_setup()) { |
|
251
|
0
|
0
|
|
|
|
|
if (my $zd = Games::Rezrov::StoryFile::get_zdict()) { |
|
252
|
0
|
|
|
|
|
|
my @all_words = keys %{$zd->decoded_by_word}; |
|
|
0
|
|
|
|
|
|
|
|
253
|
0
|
|
|
|
|
|
$self->voice_dictation->Words(join " ", @all_words); |
|
254
|
|
|
|
|
|
|
# feed game dictionary into recognition engine as hints. |
|
255
|
|
|
|
|
|
|
# Pros: allows engine to recognize odds words like "grue". |
|
256
|
|
|
|
|
|
|
# Cons: can reveal internally-truncated words ("mailbo" instead of "mailbox"). |
|
257
|
0
|
|
|
|
|
|
$self->recog_dictionary_setup(1); |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
0
|
|
|
|
|
|
$self->recognized_phrases([]); |
|
262
|
0
|
|
|
|
|
|
while (1) { |
|
263
|
0
|
0
|
|
|
|
|
if (is_dictation_enabled($_[0]->voice_dictation)) { |
|
264
|
|
|
|
|
|
|
# dictation still enabled, wait for user to say something |
|
265
|
0
|
|
|
|
|
|
my $phrases = $self->recognized_phrases(); |
|
266
|
0
|
0
|
|
|
|
|
if (@{$phrases}) { |
|
|
0
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
$self->recognized_phrases([]); |
|
268
|
0
|
|
|
|
|
|
return $phrases->[$#$phrases]; |
|
269
|
|
|
|
|
|
|
} else { |
|
270
|
0
|
|
|
|
|
|
Win32::OLE->SpinMessageLoop(); |
|
271
|
0
|
|
|
|
|
|
Win32::Sleep(20); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
} else { |
|
274
|
|
|
|
|
|
|
# dictation no longer enabled, so break and return control to keyboard |
|
275
|
0
|
|
|
|
|
|
$self->write_string("[dictation stopped]"); |
|
276
|
0
|
|
|
|
|
|
$self->listening(0); |
|
277
|
0
|
|
|
|
|
|
return ""; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub is_dictation_enabled { |
|
283
|
|
|
|
|
|
|
# STATIC |
|
284
|
0
|
|
|
0
|
0
|
|
my ($vd) = @_; |
|
285
|
0
|
0
|
|
|
|
|
die unless $vd; |
|
286
|
0
|
|
|
|
|
|
my $mode = $vd->Mode; |
|
287
|
0
|
0
|
|
|
|
|
return $mode == VSRMODE_DCTONLY or $mode == VSRMODE_CMDANDDCT; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
1; |