| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ZMachine::ZSCII 0.005; |
|
2
|
1
|
|
|
1
|
|
134790
|
use 5.14.0; |
|
|
1
|
|
|
|
|
6
|
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
22
|
|
|
4
|
|
|
|
|
|
|
# ABSTRACT: an encoder/decoder for Z-Machine text |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use Carp (); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
14
|
|
|
7
|
1
|
|
|
1
|
|
3
|
use charnames ':full'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
15
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#pod =head1 OVERVIEW |
|
10
|
|
|
|
|
|
|
#pod |
|
11
|
|
|
|
|
|
|
#pod ZMachine::ZSCII is a class for objects that are encoders/decoders of Z-Machine |
|
12
|
|
|
|
|
|
|
#pod text. Right now, ZMachine::ZSCII only implements Version 5 (and thus 7 and 8), |
|
13
|
|
|
|
|
|
|
#pod and even that partially. There is no abbreviation support yet. |
|
14
|
|
|
|
|
|
|
#pod |
|
15
|
|
|
|
|
|
|
#pod =head2 How Z-Machine Text Works |
|
16
|
|
|
|
|
|
|
#pod |
|
17
|
|
|
|
|
|
|
#pod The Z-Machine's text strings are composed of ZSCII characters. There are 1024 |
|
18
|
|
|
|
|
|
|
#pod ZSCII codepoints, although only bottom eight bits worth are ever used. |
|
19
|
|
|
|
|
|
|
#pod Codepoints 0x20 through 0x7E are identical with the same codepoints in ASCII or |
|
20
|
|
|
|
|
|
|
#pod Unicode. |
|
21
|
|
|
|
|
|
|
#pod |
|
22
|
|
|
|
|
|
|
#pod ZSCII codepoints are then encoded as strings of five-bit Z-characters. The |
|
23
|
|
|
|
|
|
|
#pod most common ZSCII characters, the lowercase English alphabet, can be encoded |
|
24
|
|
|
|
|
|
|
#pod with one Z-character. Uppercase letters, numbers, and common punctuation |
|
25
|
|
|
|
|
|
|
#pod ZSCII characters require two Z-characters each. Any other ZSCII character can |
|
26
|
|
|
|
|
|
|
#pod be encoded with four Z-characters. |
|
27
|
|
|
|
|
|
|
#pod |
|
28
|
|
|
|
|
|
|
#pod For storage on disk or in memory, the five-bit Z-characters are packed |
|
29
|
|
|
|
|
|
|
#pod together, three in a word, and laid out in bytestrings. The last word in a |
|
30
|
|
|
|
|
|
|
#pod string has its top bit set to mark the ending. When a bytestring would end |
|
31
|
|
|
|
|
|
|
#pod with out enough Z-characters to pack a full word, it is padded. |
|
32
|
|
|
|
|
|
|
#pod (ZMachine::ZSCII pads with Z-character 0x05, a shift character.) |
|
33
|
|
|
|
|
|
|
#pod |
|
34
|
|
|
|
|
|
|
#pod Later versions of the Z-Machine allow the mapping of ZSCII codepoints to |
|
35
|
|
|
|
|
|
|
#pod Unicode codepoints to be customized. ZMachine::ZSCII does not yet support this |
|
36
|
|
|
|
|
|
|
#pod feature. |
|
37
|
|
|
|
|
|
|
#pod |
|
38
|
|
|
|
|
|
|
#pod ZMachine::ZSCII I allow conversion between all four relevant |
|
39
|
|
|
|
|
|
|
#pod representations: Unicode text, ZSCII text, Z-character strings, and packed |
|
40
|
|
|
|
|
|
|
#pod Z-character bytestrings. All four forms are represented by Perl strings. |
|
41
|
|
|
|
|
|
|
#pod |
|
42
|
|
|
|
|
|
|
#pod =cut |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my %DEFAULT_ZSCII = ( |
|
45
|
|
|
|
|
|
|
chr(0x00) => "\N{NULL}", |
|
46
|
|
|
|
|
|
|
chr(0x08) => "\N{DELETE}", |
|
47
|
|
|
|
|
|
|
chr(0x0D) => "\x0D", |
|
48
|
|
|
|
|
|
|
chr(0x1B) => "\N{ESCAPE}", |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
(map {; chr $_ => chr $_ } (0x20 .. 0x7E)), # ASCII maps over |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# 0x09B - 0x0FB are the "extra characters" and need Unicode translation table |
|
53
|
|
|
|
|
|
|
# 0x0FF - 0x3FF are undefined and never (?) used |
|
54
|
|
|
|
|
|
|
); |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# We can use these characters below because they all (save for the magic A2-C6) |
|
57
|
|
|
|
|
|
|
# are the same in Unicode/ASCII/ZSCII. -- rjbs, 2013-01-18 |
|
58
|
|
|
|
|
|
|
my $DEFAULT_ALPHABET = join(q{}, |
|
59
|
|
|
|
|
|
|
'a' .. 'z', # A0 |
|
60
|
|
|
|
|
|
|
'A' .. 'Z', # A1 |
|
61
|
|
|
|
|
|
|
( # A2 |
|
62
|
|
|
|
|
|
|
"\0", # special: read 2 chars for 10-bit zscii character |
|
63
|
|
|
|
|
|
|
"\x0D", |
|
64
|
|
|
|
|
|
|
(0 .. 9), |
|
65
|
1
|
|
|
1
|
|
13018
|
do { no warnings 'qw'; qw[ . , ! ? _ # ' " / \ - : ( ) ] }, |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
2551
|
|
|
66
|
|
|
|
|
|
|
), |
|
67
|
|
|
|
|
|
|
); |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my @DEFAULT_EXTRA = map chr hex, qw( |
|
70
|
|
|
|
|
|
|
E4 F6 FC C4 D6 DC DF BB AB EB EF FF CB CF E1 E9 |
|
71
|
|
|
|
|
|
|
ED F3 FA FD C1 C9 CD D3 DA DD E0 E8 EC F2 F9 C0 |
|
72
|
|
|
|
|
|
|
C8 CC D2 D9 |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
E2 EA EE F4 FB C2 CA CE D4 DB E5 C5 F8 D8 E3 F1 |
|
75
|
|
|
|
|
|
|
F5 C3 D1 D5 E6 C6 E7 C7 FE F0 DE D0 A3 153 152 A1 |
|
76
|
|
|
|
|
|
|
BF |
|
77
|
|
|
|
|
|
|
); |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub _validate_alphabet { |
|
80
|
4
|
|
|
4
|
|
7
|
my (undef, $alphabet) = @_; |
|
81
|
|
|
|
|
|
|
|
|
82
|
4
|
50
|
|
|
|
8
|
Carp::croak("alphabet table was not 78 entries long") |
|
83
|
|
|
|
|
|
|
unless length $alphabet == 78; |
|
84
|
|
|
|
|
|
|
|
|
85
|
4
|
50
|
|
|
|
12
|
Carp::carp("alphabet character 52 not set to 0x000") |
|
86
|
|
|
|
|
|
|
unless substr($alphabet, 52, 1) eq chr(0); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Carp::croak("alphabet table contains characters over 0xFF") |
|
89
|
4
|
50
|
|
|
|
38
|
if grep {; ord > 0xFF } split //, $alphabet; |
|
|
312
|
|
|
|
|
389
|
|
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub _shortcuts_for { |
|
93
|
4
|
|
|
4
|
|
9
|
my ($self, $alphabet) = @_; |
|
94
|
|
|
|
|
|
|
|
|
95
|
4
|
|
|
|
|
9
|
$self->_validate_alphabet($alphabet); |
|
96
|
|
|
|
|
|
|
|
|
97
|
4
|
|
|
|
|
20
|
my %shortcut = (q{ } => chr(0)); |
|
98
|
|
|
|
|
|
|
|
|
99
|
4
|
|
|
|
|
12
|
for my $i (0 .. 2) { |
|
100
|
12
|
|
|
|
|
18
|
my $offset = $i * 26; |
|
101
|
12
|
100
|
|
|
|
21
|
my $prefix = $i ? chr(0x03 + $i) : ''; |
|
102
|
|
|
|
|
|
|
|
|
103
|
12
|
|
|
|
|
15
|
for my $j (0 .. 25) { |
|
104
|
312
|
100
|
100
|
|
|
506
|
next if $i == 2 and $j == 0; # that guy is magic! -- rjbs, 2013-01-18 |
|
105
|
|
|
|
|
|
|
|
|
106
|
308
|
|
|
|
|
510
|
$shortcut{ substr($alphabet, $offset + $j, 1) } = $prefix . chr($j + 6); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
4
|
|
|
|
|
10
|
return \%shortcut; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
#pod =method new |
|
114
|
|
|
|
|
|
|
#pod |
|
115
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new; |
|
116
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new(\%arg); |
|
117
|
|
|
|
|
|
|
#pod my $z = ZMachine::ZSCII->new($version); |
|
118
|
|
|
|
|
|
|
#pod |
|
119
|
|
|
|
|
|
|
#pod This returns a new codec. If the only argument is a number, it is treated as a |
|
120
|
|
|
|
|
|
|
#pod version specification. If no arguments are given, a Version 5 codec is made. |
|
121
|
|
|
|
|
|
|
#pod |
|
122
|
|
|
|
|
|
|
#pod Valid named arguments are: |
|
123
|
|
|
|
|
|
|
#pod |
|
124
|
|
|
|
|
|
|
#pod =begin :list |
|
125
|
|
|
|
|
|
|
#pod |
|
126
|
|
|
|
|
|
|
#pod = version |
|
127
|
|
|
|
|
|
|
#pod |
|
128
|
|
|
|
|
|
|
#pod The number of the Z-Machine targeted; at present, only 5, 7, or 8 are permitted |
|
129
|
|
|
|
|
|
|
#pod values. |
|
130
|
|
|
|
|
|
|
#pod |
|
131
|
|
|
|
|
|
|
#pod = extra_characters |
|
132
|
|
|
|
|
|
|
#pod |
|
133
|
|
|
|
|
|
|
#pod This is a reference to an array of between 0 and 97 Unicode characters. These |
|
134
|
|
|
|
|
|
|
#pod will be the characters to which ZSCII characters 155 through 251. They may not |
|
135
|
|
|
|
|
|
|
#pod duplicate any characters represented by the default ZSCII set. No Unicode |
|
136
|
|
|
|
|
|
|
#pod codepoint above U+FFFF is permitted, as it would not be representable in the |
|
137
|
|
|
|
|
|
|
#pod Z-Machine Unicode substitution table. |
|
138
|
|
|
|
|
|
|
#pod |
|
139
|
|
|
|
|
|
|
#pod If no extra characters are given, the default table is used. |
|
140
|
|
|
|
|
|
|
#pod |
|
141
|
|
|
|
|
|
|
#pod = alphabet |
|
142
|
|
|
|
|
|
|
#pod |
|
143
|
|
|
|
|
|
|
#pod This is a string of 78 characters, representing the three 26-character |
|
144
|
|
|
|
|
|
|
#pod alphabets used to encode ZSCII compactly into Z-characters. The first 26 |
|
145
|
|
|
|
|
|
|
#pod characters are alphabet 0, for the most common characters. The rest of the |
|
146
|
|
|
|
|
|
|
#pod characters are alphabets 1 and 2. |
|
147
|
|
|
|
|
|
|
#pod |
|
148
|
|
|
|
|
|
|
#pod No character with a ZSCII value greater than 0xFF may be included in the |
|
149
|
|
|
|
|
|
|
#pod alphabet. Character 52 (A2's first character) should be NUL. |
|
150
|
|
|
|
|
|
|
#pod |
|
151
|
|
|
|
|
|
|
#pod If no alphabet is given, the default alphabet is used. |
|
152
|
|
|
|
|
|
|
#pod |
|
153
|
|
|
|
|
|
|
#pod = alphabet_is_unicode |
|
154
|
|
|
|
|
|
|
#pod |
|
155
|
|
|
|
|
|
|
#pod By default, the values in the C are assumed to be ZSCII characters, |
|
156
|
|
|
|
|
|
|
#pod so that the contents of the alphabet table from the Z-Machine's memory can be |
|
157
|
|
|
|
|
|
|
#pod used directly. The C option specifies that the characters |
|
158
|
|
|
|
|
|
|
#pod in the alphabet string are Unicode characters. They will be converted to ZSCII |
|
159
|
|
|
|
|
|
|
#pod internally by the C method, and if characters appear in the |
|
160
|
|
|
|
|
|
|
#pod alphabet that are not in the default ZSCII set or the extra characters, an |
|
161
|
|
|
|
|
|
|
#pod exception will be raised. |
|
162
|
|
|
|
|
|
|
#pod |
|
163
|
|
|
|
|
|
|
#pod =end :list |
|
164
|
|
|
|
|
|
|
#pod |
|
165
|
|
|
|
|
|
|
#pod =cut |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub new { |
|
168
|
5
|
|
|
5
|
1
|
6523
|
my ($class, $arg) = @_; |
|
169
|
|
|
|
|
|
|
|
|
170
|
5
|
50
|
|
|
|
15
|
if (! defined $arg) { |
|
171
|
0
|
|
|
|
|
0
|
$arg = { version => 5 }; |
|
172
|
5
|
100
|
|
|
|
13
|
} if (! ref $arg) { |
|
173
|
2
|
|
|
|
|
5
|
$arg = { version => $arg }; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
5
|
|
|
|
|
13
|
my $guts = { version => $arg->{version} }; |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Carp::croak("only Version 5, 7, and 8 ZSCII are supported at present") |
|
179
|
|
|
|
|
|
|
unless $guts->{version} == 5 |
|
180
|
|
|
|
|
|
|
or $guts->{version} == 7 |
|
181
|
5
|
50
|
66
|
|
|
96
|
or $guts->{version} == 8; |
|
|
|
|
66
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
|
183
|
4
|
|
|
|
|
220
|
$guts->{zscii} = { %DEFAULT_ZSCII }; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# Why is this an arrayref and not, like alphabets, a string? |
|
186
|
|
|
|
|
|
|
# Alphabets are strings because they're guaranteed to fit in bytestrings. |
|
187
|
|
|
|
|
|
|
# You can't put a ZSCII character over 0xFF in the alphabet, because it can't |
|
188
|
|
|
|
|
|
|
# be put in the story file's alphabet table! By using a string, it's easy to |
|
189
|
|
|
|
|
|
|
# just pass in the alphabet from memory to/from the codec. On the other |
|
190
|
|
|
|
|
|
|
# hand, the Unicode translation table stores Unicode codepoint values packed |
|
191
|
|
|
|
|
|
|
# into words, and it's not a good fit for use in the codec. Maybe a |
|
192
|
|
|
|
|
|
|
# ZMachine::Util will be useful for packing/unpacking Unicode translation |
|
193
|
|
|
|
|
|
|
# tables. |
|
194
|
|
|
|
|
|
|
$guts->{extra} = $arg->{extra_characters} |
|
195
|
4
|
|
100
|
|
|
24
|
|| \@DEFAULT_EXTRA; |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Carp::confess("Unicode translation table exceeds maximum length of 97") |
|
198
|
4
|
50
|
|
|
|
9
|
if @{ $guts->{extra} } > 97; |
|
|
4
|
|
|
|
|
11
|
|
|
199
|
|
|
|
|
|
|
|
|
200
|
4
|
|
|
|
|
6
|
for (0 .. $#{ $guts->{extra} }) { |
|
|
4
|
|
|
|
|
12
|
|
|
201
|
|
|
|
|
|
|
Carp::confess("tried to add ambiguous Z->U mapping") |
|
202
|
78
|
50
|
|
|
|
121
|
if exists $guts->{zscii}{ chr(155 + $_) }; |
|
203
|
|
|
|
|
|
|
|
|
204
|
78
|
|
|
|
|
91
|
my $u_char = $guts->{extra}[$_]; |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Extra characters must go into the Unicode substitution table, which can |
|
207
|
|
|
|
|
|
|
# only represent characters with codepoints between 0 and 0xFFFF. See |
|
208
|
|
|
|
|
|
|
# Z-Machine Spec v1.1 ยง 3.8.4.2.1 |
|
209
|
78
|
50
|
|
|
|
109
|
Carp::confess("tried to add Unicode codepoint greater than U+FFFF") |
|
210
|
|
|
|
|
|
|
if ord($u_char) > 0xFFFF; |
|
211
|
|
|
|
|
|
|
|
|
212
|
78
|
|
|
|
|
142
|
$guts->{zscii}{ chr(155 + $_) } = $u_char; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
4
|
|
|
|
|
12
|
$guts->{zscii_for} = { }; |
|
216
|
4
|
|
|
|
|
8
|
for my $zscii_char (sort keys %{ $guts->{zscii} }) { |
|
|
4
|
|
|
|
|
127
|
|
|
217
|
474
|
|
|
|
|
494
|
my $unicode_char = $guts->{zscii}{$zscii_char}; |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Carp::confess("tried to add ambiguous U->Z mapping") |
|
220
|
474
|
50
|
|
|
|
587
|
if exists $guts->{zscii_for}{ $unicode_char }; |
|
221
|
|
|
|
|
|
|
|
|
222
|
474
|
|
|
|
|
638
|
$guts->{zscii_for}{ $unicode_char } = $zscii_char; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
4
|
|
|
|
|
20
|
my $self = bless $guts => $class; |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# The default alphabet is entirely made up of characters that are the same in |
|
228
|
|
|
|
|
|
|
# Unicode and ZSCII. If a user wants to put "extra characters" into the |
|
229
|
|
|
|
|
|
|
# alphabet table, though, the alphabet should contain ZSCII values. When |
|
230
|
|
|
|
|
|
|
# we're building a ZMachine::ZSCII using the contents of the story file's |
|
231
|
|
|
|
|
|
|
# alphabet table, that's easy. If we're building a codec to *produce* a |
|
232
|
|
|
|
|
|
|
# story file, it's less trivial, because we don't want to think about the |
|
233
|
|
|
|
|
|
|
# specific ZSCII codepoints for the Unicode text we'll encode. |
|
234
|
|
|
|
|
|
|
# |
|
235
|
|
|
|
|
|
|
# We provide alphabet_is_unicode to let the user say "my alphabet is supplied |
|
236
|
|
|
|
|
|
|
# in Unicode, please convert it to ZSCII during construction." -- rjbs, |
|
237
|
|
|
|
|
|
|
# 2013-01-19 |
|
238
|
4
|
|
66
|
|
|
13
|
my $alphabet = $arg->{alphabet} || $DEFAULT_ALPHABET; |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# It's okay if the user supplies alphabet_is_unicode but not alphabet, |
|
241
|
|
|
|
|
|
|
# because the default alphabet is all characters with the same value in both |
|
242
|
|
|
|
|
|
|
# character sets! -- rjbs, 2013-01-20 |
|
243
|
|
|
|
|
|
|
$alphabet = $self->unicode_to_zscii($alphabet) |
|
244
|
4
|
100
|
|
|
|
10
|
if $arg->{alphabet_is_unicode}; |
|
245
|
|
|
|
|
|
|
|
|
246
|
4
|
|
|
|
|
10
|
$self->{alphabet} = $alphabet; |
|
247
|
4
|
|
|
|
|
9
|
$self->{shortcut} = $class->_shortcuts_for( $self->{alphabet} ); |
|
248
|
|
|
|
|
|
|
|
|
249
|
4
|
|
|
|
|
11
|
return $self; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
#pod =method encode |
|
253
|
|
|
|
|
|
|
#pod |
|
254
|
|
|
|
|
|
|
#pod my $packed_zchars = $z->encode( $unicode_text ); |
|
255
|
|
|
|
|
|
|
#pod |
|
256
|
|
|
|
|
|
|
#pod This method takes a string of text and encodes it to a bytestring of packed |
|
257
|
|
|
|
|
|
|
#pod Z-characters. |
|
258
|
|
|
|
|
|
|
#pod |
|
259
|
|
|
|
|
|
|
#pod Internally, it converts the Unicode text to ZSCII, then to Z-characters, and |
|
260
|
|
|
|
|
|
|
#pod then packs them. Before this processing, any native newline characters (the |
|
261
|
|
|
|
|
|
|
#pod value of C<\n>) are converted to C to match the Z-Machine's use of |
|
262
|
|
|
|
|
|
|
#pod character 0x00D for newline. |
|
263
|
|
|
|
|
|
|
#pod |
|
264
|
|
|
|
|
|
|
#pod =cut |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub encode { |
|
267
|
3
|
|
|
3
|
1
|
1222
|
my ($self, $string) = @_; |
|
268
|
|
|
|
|
|
|
|
|
269
|
3
|
|
|
|
|
11
|
$string =~ s/\n/\x0D/g; |
|
270
|
|
|
|
|
|
|
|
|
271
|
3
|
|
|
|
|
9
|
my $zscii = $self->unicode_to_zscii($string); |
|
272
|
3
|
|
|
|
|
7
|
my $zchars = $self->zscii_to_zchars($zscii); |
|
273
|
|
|
|
|
|
|
|
|
274
|
3
|
|
|
|
|
10
|
return $self->pack_zchars($zchars); |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#pod =method decode |
|
278
|
|
|
|
|
|
|
#pod |
|
279
|
|
|
|
|
|
|
#pod my $text = $z->decode( $packed_zchars ); |
|
280
|
|
|
|
|
|
|
#pod |
|
281
|
|
|
|
|
|
|
#pod This method takes a bytestring of packed Z-characters and returns a string of |
|
282
|
|
|
|
|
|
|
#pod text. |
|
283
|
|
|
|
|
|
|
#pod |
|
284
|
|
|
|
|
|
|
#pod Internally, it unpacks the Z-characters, converts them to ZSCII, and then |
|
285
|
|
|
|
|
|
|
#pod converts those to Unicode. Any ZSCII characters 0x00D are converted to the |
|
286
|
|
|
|
|
|
|
#pod value of C<\n>. |
|
287
|
|
|
|
|
|
|
#pod |
|
288
|
|
|
|
|
|
|
#pod =cut |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub decode { |
|
291
|
3
|
|
|
3
|
1
|
2010
|
my ($self, $bytestring) = @_; |
|
292
|
|
|
|
|
|
|
|
|
293
|
3
|
|
|
|
|
8
|
my $zchars = $self->unpack_zchars( $bytestring ); |
|
294
|
3
|
|
|
|
|
7
|
my $zscii = $self->zchars_to_zscii( $zchars ); |
|
295
|
3
|
|
|
|
|
7
|
my $unicode = $self->zscii_to_unicode( $zscii ); |
|
296
|
|
|
|
|
|
|
|
|
297
|
3
|
|
|
|
|
10
|
$unicode =~ s/\x0D/\n/g; |
|
298
|
|
|
|
|
|
|
|
|
299
|
3
|
|
|
|
|
9
|
return $unicode; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#pod =method unicode_to_zscii |
|
303
|
|
|
|
|
|
|
#pod |
|
304
|
|
|
|
|
|
|
#pod my $zscii_string = $z->unicode_to_zscii( $unicode_string ); |
|
305
|
|
|
|
|
|
|
#pod |
|
306
|
|
|
|
|
|
|
#pod This method converts a Unicode string to a ZSCII string, using the dialect of |
|
307
|
|
|
|
|
|
|
#pod ZSCII for the ZMachine::ZSCII's configuration. |
|
308
|
|
|
|
|
|
|
#pod |
|
309
|
|
|
|
|
|
|
#pod If the Unicode input contains any characters that cannot be mapped to ZSCII, an |
|
310
|
|
|
|
|
|
|
#pod exception is raised. |
|
311
|
|
|
|
|
|
|
#pod |
|
312
|
|
|
|
|
|
|
#pod =cut |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub unicode_to_zscii { |
|
315
|
14
|
|
|
14
|
1
|
7310
|
my ($self, $unicode_text) = @_; |
|
316
|
|
|
|
|
|
|
|
|
317
|
14
|
|
|
|
|
22
|
my $zscii = ''; |
|
318
|
14
|
|
|
|
|
34
|
for (0 .. length($unicode_text) - 1) { |
|
319
|
189
|
|
|
|
|
238
|
my $char = substr $unicode_text, $_, 1; |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
Carp::croak( |
|
322
|
|
|
|
|
|
|
sprintf "no ZSCII character available for Unicode U+%v05X <%s>", |
|
323
|
|
|
|
|
|
|
$char, |
|
324
|
|
|
|
|
|
|
charnames::viacode(ord $char), |
|
325
|
189
|
100
|
|
|
|
310
|
) unless defined( my $zscii_char = $self->{zscii_for}{ $char } ); |
|
326
|
|
|
|
|
|
|
|
|
327
|
188
|
|
|
|
|
228
|
$zscii .= $zscii_char; |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
13
|
|
|
|
|
34
|
return $zscii; |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
#pod =method zscii_to_unicode |
|
334
|
|
|
|
|
|
|
#pod |
|
335
|
|
|
|
|
|
|
#pod my $unicode_string = $z->zscii_to_unicode( $zscii_string ); |
|
336
|
|
|
|
|
|
|
#pod |
|
337
|
|
|
|
|
|
|
#pod This method converts a ZSCII string to a Unicode string, using the dialect of |
|
338
|
|
|
|
|
|
|
#pod ZSCII for the ZMachine::ZSCII's configuration. |
|
339
|
|
|
|
|
|
|
#pod |
|
340
|
|
|
|
|
|
|
#pod If the ZSCII input contains any characters that cannot be mapped to Unicode, an |
|
341
|
|
|
|
|
|
|
#pod exception is raised. I
|
|
342
|
|
|
|
|
|
|
#pod replacement character instead.> |
|
343
|
|
|
|
|
|
|
#pod |
|
344
|
|
|
|
|
|
|
#pod =cut |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub zscii_to_unicode { |
|
347
|
4
|
|
|
4
|
1
|
474
|
my ($self, $zscii) = @_; |
|
348
|
|
|
|
|
|
|
|
|
349
|
4
|
|
|
|
|
5
|
my $unicode = ''; |
|
350
|
4
|
|
|
|
|
10
|
for (0 .. length($zscii) - 1) { |
|
351
|
49
|
|
|
|
|
52
|
my $char = substr $zscii, $_, 1; |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
Carp::croak( |
|
354
|
|
|
|
|
|
|
sprintf "no Unicode character available for ZSCII %#v05x", $char, |
|
355
|
49
|
50
|
|
|
|
83
|
) unless defined(my $unicode_char = $self->{zscii}{ $char }); |
|
356
|
|
|
|
|
|
|
|
|
357
|
49
|
|
|
|
|
57
|
$unicode .= $unicode_char; |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
|
|
360
|
4
|
|
|
|
|
6
|
return $unicode; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
#pod =method zscii_to_zchars |
|
364
|
|
|
|
|
|
|
#pod |
|
365
|
|
|
|
|
|
|
#pod my $zchars = $z->zscii_to_zchars( $zscii_string ); |
|
366
|
|
|
|
|
|
|
#pod |
|
367
|
|
|
|
|
|
|
#pod Given a string of ZSCII characters, this method will return a (unpacked) string |
|
368
|
|
|
|
|
|
|
#pod of Z-characters. |
|
369
|
|
|
|
|
|
|
#pod |
|
370
|
|
|
|
|
|
|
#pod It will raise an exception on ZSCII codepoints that cannot be represented as |
|
371
|
|
|
|
|
|
|
#pod Z-characters, which should not be possible with legal ZSCII. |
|
372
|
|
|
|
|
|
|
#pod |
|
373
|
|
|
|
|
|
|
#pod =cut |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub zscii_to_zchars { |
|
376
|
10
|
|
|
10
|
1
|
5491
|
my ($self, $zscii) = @_; |
|
377
|
|
|
|
|
|
|
|
|
378
|
10
|
50
|
|
|
|
21
|
return '' unless length $zscii; |
|
379
|
|
|
|
|
|
|
|
|
380
|
10
|
|
|
|
|
14
|
my $zchars = ''; |
|
381
|
10
|
|
|
|
|
19
|
for (0 .. length($zscii) - 1) { |
|
382
|
103
|
|
|
|
|
115
|
my $zscii_char = substr($zscii, $_, 1); |
|
383
|
103
|
100
|
|
|
|
159
|
if (defined (my $shortcut = $self->{shortcut}{ $zscii_char })) { |
|
384
|
92
|
|
|
|
|
90
|
$zchars .= $shortcut; |
|
385
|
92
|
|
|
|
|
110
|
next; |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
|
|
388
|
11
|
|
|
|
|
16
|
my $ord = ord $zscii_char; |
|
389
|
|
|
|
|
|
|
|
|
390
|
11
|
50
|
|
|
|
18
|
if ($ord >= 1024) { |
|
391
|
0
|
|
|
|
|
0
|
Carp::croak( |
|
392
|
|
|
|
|
|
|
sprintf "can't encode ZSCII codepoint %#v05x in Z-characters", |
|
393
|
|
|
|
|
|
|
$zscii_char |
|
394
|
|
|
|
|
|
|
); |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
11
|
|
|
|
|
16
|
my $top = ($ord & 0b1111100000) >> 5; |
|
398
|
11
|
|
|
|
|
13
|
my $bot = ($ord & 0b0000011111); |
|
399
|
|
|
|
|
|
|
|
|
400
|
11
|
|
|
|
|
14
|
$zchars .= "\x05\x06"; # The escape code for a ten-bit ZSCII character. |
|
401
|
11
|
|
|
|
|
22
|
$zchars .= chr($top) . chr($bot); |
|
402
|
|
|
|
|
|
|
} |
|
403
|
|
|
|
|
|
|
|
|
404
|
10
|
|
|
|
|
21
|
return $zchars; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#pod =method zchars_to_zscii |
|
408
|
|
|
|
|
|
|
#pod |
|
409
|
|
|
|
|
|
|
#pod my $zscii = $z->zchars_to_zscii( $zchars_string, \%arg ); |
|
410
|
|
|
|
|
|
|
#pod |
|
411
|
|
|
|
|
|
|
#pod Given a string of (unpacked) Z-characters, this method will return a string of |
|
412
|
|
|
|
|
|
|
#pod ZSCII characters. |
|
413
|
|
|
|
|
|
|
#pod |
|
414
|
|
|
|
|
|
|
#pod It will raise an exception when the right thing to do can't be determined. |
|
415
|
|
|
|
|
|
|
#pod Right now, that could mean lots of things. |
|
416
|
|
|
|
|
|
|
#pod |
|
417
|
|
|
|
|
|
|
#pod Valid arguments are: |
|
418
|
|
|
|
|
|
|
#pod |
|
419
|
|
|
|
|
|
|
#pod =begin :list |
|
420
|
|
|
|
|
|
|
#pod |
|
421
|
|
|
|
|
|
|
#pod = allow_early_termination |
|
422
|
|
|
|
|
|
|
#pod |
|
423
|
|
|
|
|
|
|
#pod If C is true, no exception is thrown if the |
|
424
|
|
|
|
|
|
|
#pod Z-character string ends in the middle of a four z-character sequence. This is |
|
425
|
|
|
|
|
|
|
#pod useful when dealing with dictionary words. |
|
426
|
|
|
|
|
|
|
#pod |
|
427
|
|
|
|
|
|
|
#pod =end :list |
|
428
|
|
|
|
|
|
|
#pod |
|
429
|
|
|
|
|
|
|
#pod =cut |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub zchars_to_zscii { |
|
432
|
7
|
|
|
7
|
1
|
2504
|
my ($self, $zchars, $arg) = @_; |
|
433
|
7
|
|
100
|
|
|
30
|
$arg ||= {}; |
|
434
|
|
|
|
|
|
|
|
|
435
|
7
|
|
|
|
|
10
|
my $text = ''; |
|
436
|
7
|
|
|
|
|
10
|
my $alphabet = 0; |
|
437
|
|
|
|
|
|
|
|
|
438
|
7
|
|
|
|
|
21
|
while (length( my $char = substr $zchars, 0, 1, '')) { |
|
439
|
104
|
|
|
|
|
109
|
my $ord = ord $char; |
|
440
|
|
|
|
|
|
|
|
|
441
|
104
|
100
|
|
|
|
143
|
if ($ord == 0) { $text .= q{ }; next; } |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
5
|
|
|
442
|
|
|
|
|
|
|
|
|
443
|
101
|
100
|
|
|
|
158
|
if ($ord == 0x04) { $alphabet = 1; next } |
|
|
7
|
100
|
|
|
|
7
|
|
|
|
7
|
|
|
|
|
12
|
|
|
444
|
23
|
|
|
|
|
25
|
elsif ($ord == 0x05) { $alphabet = 2; next } |
|
|
23
|
|
|
|
|
35
|
|
|
445
|
|
|
|
|
|
|
|
|
446
|
71
|
100
|
100
|
|
|
132
|
if ($alphabet == 2 && $ord == 0x06) { |
|
447
|
12
|
|
|
|
|
20
|
my $next_two = substr $zchars, 0, 2, ''; |
|
448
|
12
|
100
|
|
|
|
18
|
if (length $next_two != 2) { |
|
449
|
2
|
100
|
|
|
|
6
|
last if $arg->{allow_early_termination}; |
|
450
|
1
|
|
|
|
|
124
|
Carp::croak("ten-bit ZSCII encoding segment terminated early") |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
|
|
453
|
10
|
|
|
|
|
17
|
my $value = ord(substr $next_two, 0, 1) << 5 |
|
454
|
|
|
|
|
|
|
| ord(substr $next_two, 1, 1); |
|
455
|
|
|
|
|
|
|
|
|
456
|
10
|
|
|
|
|
12
|
$text .= chr $value; |
|
457
|
10
|
|
|
|
|
11
|
$alphabet = 0; |
|
458
|
10
|
|
|
|
|
19
|
next; |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
59
|
50
|
33
|
|
|
125
|
if ($ord >= 0x06 && $ord <= 0x1F) { |
|
462
|
59
|
|
|
|
|
102
|
$text .= substr $self->{alphabet}, (26 * $alphabet) + $ord - 6, 1; |
|
463
|
59
|
|
|
|
|
58
|
$alphabet = 0; |
|
464
|
59
|
|
|
|
|
96
|
next; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
Carp::croak("unknown zchar <$char> encountered in alphabet <$alphabet>"); |
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
|
|
470
|
6
|
|
|
|
|
16
|
return $text; |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
#pod =method make_dict_length |
|
474
|
|
|
|
|
|
|
#pod |
|
475
|
|
|
|
|
|
|
#pod my $zchars = $z->make_dict_length( $zchars_string ) |
|
476
|
|
|
|
|
|
|
#pod |
|
477
|
|
|
|
|
|
|
#pod This method returns the Z-character string fit to dictionary length for the |
|
478
|
|
|
|
|
|
|
#pod Z-machine version being handled. It will trim excess characters or pad with |
|
479
|
|
|
|
|
|
|
#pod Z-character 5 to be the right length. |
|
480
|
|
|
|
|
|
|
#pod |
|
481
|
|
|
|
|
|
|
#pod When converting such strings back to ZSCII, you should pass the |
|
482
|
|
|
|
|
|
|
#pod C to C, as a four-Z-character |
|
483
|
|
|
|
|
|
|
#pod sequence may have been terminated early. |
|
484
|
|
|
|
|
|
|
#pod |
|
485
|
|
|
|
|
|
|
#pod =cut |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub make_dict_length { |
|
488
|
3
|
|
|
3
|
1
|
1424
|
my ($self, $zchars) = @_; |
|
489
|
|
|
|
|
|
|
|
|
490
|
3
|
50
|
|
|
|
8
|
my $length = $self->{version} >= 5 ? 9 : 6; |
|
491
|
3
|
|
|
|
|
7
|
$zchars = substr $zchars, 0, $length; |
|
492
|
3
|
|
|
|
|
7
|
$zchars .= "\x05" x ($length - length($zchars)); |
|
493
|
|
|
|
|
|
|
|
|
494
|
3
|
|
|
|
|
6
|
return $zchars; |
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
#pod =method pack_zchars |
|
498
|
|
|
|
|
|
|
#pod |
|
499
|
|
|
|
|
|
|
#pod my $packed_zchars = $z->pack_zchars( $zchars_string ); |
|
500
|
|
|
|
|
|
|
#pod |
|
501
|
|
|
|
|
|
|
#pod This method takes a string of unpacked Z-characters and packs them into a |
|
502
|
|
|
|
|
|
|
#pod bytestring with three Z-characters per word. The final word will have its top |
|
503
|
|
|
|
|
|
|
#pod bit set. |
|
504
|
|
|
|
|
|
|
#pod |
|
505
|
|
|
|
|
|
|
#pod =cut |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub pack_zchars { |
|
508
|
4
|
|
|
4
|
1
|
880
|
my ($self, $zchars) = @_; |
|
509
|
|
|
|
|
|
|
|
|
510
|
4
|
|
|
|
|
6
|
my $bytestring = ''; |
|
511
|
|
|
|
|
|
|
|
|
512
|
4
|
|
|
|
|
13
|
while (my $substr = substr $zchars, 0, 3, '') { |
|
513
|
31
|
|
|
|
|
48
|
$substr .= chr(5) until length $substr == 3; |
|
514
|
|
|
|
|
|
|
|
|
515
|
31
|
|
|
|
|
48
|
my $value = ord(substr($substr, 0, 1)) << 10 |
|
516
|
|
|
|
|
|
|
| ord(substr($substr, 1, 1)) << 5 |
|
517
|
|
|
|
|
|
|
| ord(substr($substr, 2, 1)); |
|
518
|
|
|
|
|
|
|
|
|
519
|
31
|
100
|
|
|
|
44
|
$value |= (0x8000) if ! length $zchars; |
|
520
|
|
|
|
|
|
|
|
|
521
|
31
|
|
|
|
|
69
|
$bytestring .= pack 'n', $value; |
|
522
|
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
4
|
|
|
|
|
10
|
return $bytestring; |
|
525
|
|
|
|
|
|
|
} |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
#pod =method unpack_zchars |
|
528
|
|
|
|
|
|
|
#pod |
|
529
|
|
|
|
|
|
|
#pod my $zchars_string = $z->pack_zchars( $packed_zchars ); |
|
530
|
|
|
|
|
|
|
#pod |
|
531
|
|
|
|
|
|
|
#pod Given a bytestring of packed Z-characters, this method will unpack them into a |
|
532
|
|
|
|
|
|
|
#pod string of unpacked Z-characters that aren't packed anymore because they're |
|
533
|
|
|
|
|
|
|
#pod unpacked instead of packed. |
|
534
|
|
|
|
|
|
|
#pod |
|
535
|
|
|
|
|
|
|
#pod Exceptions are raised if the input bytestring isn't made of an even number of |
|
536
|
|
|
|
|
|
|
#pod octets, or if the string continues past the first word with its top bit set. |
|
537
|
|
|
|
|
|
|
#pod |
|
538
|
|
|
|
|
|
|
#pod =cut |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub unpack_zchars { |
|
541
|
5
|
|
|
5
|
1
|
1009
|
my ($self, $bytestring) = @_; |
|
542
|
|
|
|
|
|
|
|
|
543
|
5
|
50
|
|
|
|
16
|
Carp::croak("bytestring of packed zchars is not an even number of bytes") |
|
544
|
|
|
|
|
|
|
if length($bytestring) % 2; |
|
545
|
|
|
|
|
|
|
|
|
546
|
5
|
|
|
|
|
12
|
my $terminate; |
|
547
|
5
|
|
|
|
|
7
|
my $zchars = ''; |
|
548
|
5
|
|
|
|
|
15
|
while (my $word = substr $bytestring, 0, 2, '') { |
|
549
|
|
|
|
|
|
|
# XXX: Probably allow this to warn and `last` -- rjbs, 2013-01-18 |
|
550
|
37
|
50
|
|
|
|
57
|
Carp::croak("input continues after terminating byte") if $terminate; |
|
551
|
|
|
|
|
|
|
|
|
552
|
37
|
|
|
|
|
52
|
my $n = unpack 'n', $word; |
|
553
|
37
|
|
|
|
|
40
|
$terminate = $n & 0x8000; |
|
554
|
|
|
|
|
|
|
|
|
555
|
37
|
|
|
|
|
50
|
my $c1 = chr( ($n & 0b0111110000000000) >> 10 ); |
|
556
|
37
|
|
|
|
|
40
|
my $c2 = chr( ($n & 0b0000001111100000) >> 5 ); |
|
557
|
37
|
|
|
|
|
39
|
my $c3 = chr( ($n & 0b0000000000011111) ); |
|
558
|
|
|
|
|
|
|
|
|
559
|
37
|
|
|
|
|
70
|
$zchars .= "$c1$c2$c3"; |
|
560
|
|
|
|
|
|
|
} |
|
561
|
|
|
|
|
|
|
|
|
562
|
5
|
|
|
|
|
15
|
return $zchars; |
|
563
|
|
|
|
|
|
|
} |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
1; |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
__END__ |