File Coverage

blib/lib/Language/Zcode/Parser/Opcode.pm
Criterion Covered Total %
statement 170 181 93.9
branch 94 106 88.6
condition 28 42 66.6
subroutine 17 17 100.0
pod 0 7 0.0
total 309 353 87.5


line stmt bran cond sub pod time code
1             package Language::Zcode::Parser::Opcode;
2            
3             =head1 NAME
4            
5             Language::Zcode::Parser::Opcode - parse one opcode
6            
7             =head1 DESCRIPTION
8            
9             This package parses one opcode. It uses the syntax described in the
10             Z-spec's table, 14.1. It parses the opcode and its arguments into
11             a hash:
12            
13             =over 4
14            
15             =item opcode
16            
17             Name of the opcode
18            
19             =item opcode_address
20            
21             Byte address of the opcode (in hex)
22            
23             =item args
24            
25             Arguments to a subroutine call
26            
27             =item negate_jump
28            
29             Negates the condition of a branch instruction
30            
31             =item jump_return
32            
33             Return true/false if branch condition is met, instead of jumping
34            
35             =back
36            
37             Other keys are (almost) identical to the arg names in the spec.
38             For example, "je a b ?(label)" yields keys a, b, and label.
39             For example2, word-index is changed to word_index to make my life easier.
40            
41             =cut
42            
43             # Program Counter
44             our $PC;
45            
46             sub parse_sub_header {
47 134     134 0 279 $PC = shift;
48 134         313 my $nl = eat_byte();
49 134 50       334 die "Bad number of locals $nl" if $nl > 15;
50             # skip local variable values
51 134         570 my @locals = (0) x $nl;
52 134 50       682 if ($Language::Zcode::Util::Constants{version} <= 4) {
53 0         0 @locals = map { &eat_word } 1..$nl
  0         0  
54             }
55 134         526 return @locals;
56             }
57            
58             { # Extra scoping brace: doing all this just once instead of each time
59             # (out of thousands) that we call this sub speeds up by several times!
60            
61             ##############3###### Many, many constants here...
62 2     2   10 use constant OP_UNKNOWN => -1;
  2         4  
  2         146  
63 2     2   10 use constant OP_0OP => 0;
  2         4  
  2         280  
64 2     2   11 use constant OP_1OP => 1;
  2         4  
  2         97  
65 2     2   9 use constant OP_2OP => 2;
  2         11  
  2         187  
66 2     2   11 use constant OP_VAR => 3;
  2         5  
  2         103  
67 2     2   10 use constant OP_EXT => 4;
  2         2  
  2         95  
68            
69             # two bits to store operand type: large or small constant, var, or none
70 2     2   9 use constant OP_TYPE_LARGE => 0;
  2         4  
  2         87  
71 2     2   10 use constant OP_TYPE_SMALL => 1;
  2         20  
  2         208  
72 2     2   12 use constant OP_TYPE_VAR => 2;
  2         3  
  2         139  
73 2     2   11 use constant OP_TYPE_DONE => 3; # Also, all remaining ops must also be '11'
  2         3  
  2         7118  
74            
75             my @TYPE_LABELS;
76             $TYPE_LABELS[OP_0OP] = "0OP";
77             $TYPE_LABELS[OP_1OP] = "1OP";
78             $TYPE_LABELS[OP_2OP] = "2OP";
79             $TYPE_LABELS[OP_VAR] = "VAR";
80             $TYPE_LABELS[OP_EXT] = "EXT";
81            
82             # OPCODE TABLES AND INFORM ASSEMBLY SYNTAX TAKEN FROM Z-SPEC
83             # (Minor changes to text, like changing - to _)
84             # Note: if an opcode is only in certain versions, we have a hash.
85             # Keys are 3 for version 3, 3- for versions 3 and over,
86             # 1-4 for versions 1 through 4, and 5:7:8 for versions 5,7,8
87             # (Last one necessary cuz 6 has fancy opcodes that 7 and 8 don't have.)
88             # Zero-operand opcodes 0OP
89             my @zero_ops = (
90             'rtrue', # 0
91             'rfalse', # 1
92             # As far as I can tell, print & print_ret are always b2/b3 (0OP)
93             # so we don't need to read their strings.
94             'print (literal_string)', # 2
95             'print_ret (literal_string)', # 3
96             'nop', # 4
97             # Version 1, version 4
98             { "1-3" => 'save ?(label)', # 5
99             "4" => 'save -> (result)'
100             }, # illegal in v5+
101             { "1-3" => 'restore ?(label)', # 6
102             "4" => 'restore -> (result)'
103             }, # illegal in v5+
104             'restart', # 7
105             'ret_popped', # 8
106             { "1-4" => 'pop',
107             "5-" => 'catch -> (result)', # 9
108             },
109             'quit', # a
110             'new_line', # b
111             { "3" => 'show_status'}, # c (v3 only)
112             { "3-" => 'verify ?(label)'}, # d
113             { "5-" => 'extended'}, # e [byte 1 of extended opcode]
114             { "5-" => 'piracy ?(label)'}, # f
115             );
116            
117             # One-operand opcodes 1OP
118             my @one_ops = (
119             'jz a ?(label)', # 0x00
120             'get_sibling object -> (result) ?(label)', # 0x01
121             'get_child object -> (result) ?(label)', # 0x02
122             'get_parent object -> (result)', # 0x03
123             'get_prop_len property_address -> (result)', # 0x04
124             'inc (variable)', # 0x05
125             'dec (variable)', # 0x06
126             'print_addr byte_address_of_string', # 0x07
127             { "4-" => 'call_1s routine -> (result)'}, # 0x08
128             'remove_obj object', # 0x09
129             'print_obj object', # 0x0a
130             'ret value', # 0x0b
131             'jump ?(label)', # 0x0c
132             'print_paddr packed_address_of_string', # 0x0d
133             'load (variable) -> (result)', # 0x0e
134             { "1-4" => 'not value -> (result)', # 0x0f
135             "5-" => 'call_1n routine',
136             },
137             );
138            
139             # Two-operand opcodes 2OP
140             my @two_ops = (
141             '', # 0x00
142             # XXX Spec says "je a b ?(label)" but je may take up to four (?) test values
143             # (The thing tested and up to 3 to test against)
144             'je a (1-3args) ?(label)', # 0x01
145             'jl a b ?(label)', # 0x02
146             'jg a b ?(label)', # 0x03
147             'dec_chk (variable) value ?(label)', # 0x04
148             'inc_chk (variable) value ?(label)', # 0x05
149             'jin obj1 obj2 ?(label)', # 0x06
150             'test bitmap flags ?(label)', # 0x07
151             'or a b -> (result)', # 0x08
152             'and a b -> (result)', # 0x09
153             'test_attr object attribute ?(label)', # 0x0a
154             'set_attr object attribute', # 0x0b
155             'clear_attr object attribute', # 0x0c
156             'store (variable) value', # 0x0d
157             'insert_obj object destination', # 0x0e
158             'loadw array word_index -> (result)', # 0x0f
159             'loadb array byte_index -> (result)', # 0x10
160             'get_prop object property -> (result)', # 0x11
161             'get_prop_addr object property -> (result)', # 0x12
162             'get_next_prop object property -> (result)', # 0x13
163             'add a b -> (result)', # 0x14
164             'sub a b -> (result)', # 0x15
165             'mul a b -> (result)', # 0x16
166             'div a b -> (result)', # 0x17
167             'mod a b -> (result)', # 0x18
168             { "4-" => 'call_2s routine arg1 -> (result)'}, # 0x19
169             { "5-" => 'call_2n routine arg1'}, # 0x1a
170             { "5:7:8" => 'set_colour foreground background',# 0x1b
171             "6" => 'set_colour foreground background window',
172             },
173             { "5-" => 'throw value stack_frame'}, # 0x1c
174             '', # 0x1d
175             '', # 0x1e
176             '', # 0x1f
177             );
178            
179             # Variable-operand opcodes VAR
180             my @var_ops = (
181             # Versions 1-3 use "call" instead of "call_vs". But aren't they the same?
182             #'call routine (0-3args) -> (result)',
183             'call_vs routine (0-3args) -> (result)', # 0x00
184             'storew array word_index value', # 0x01
185             'storeb array byte_index value', # 0x02
186             'put_prop object property value', # 0x03
187             # (Inform calls them sread/aread, but they're really all read
188             { "1-3" => 'read text parse',
189             "4" => 'read text parse time routine',
190             "5-" => 'read text parse time routine -> (result)', # 0x04
191             },
192             'print_char output_character_code', # 0x05
193             'print_num value', # 0x06
194             'random range -> (result)', # 0x07
195             'push value', # 0x08
196             { "1-5" => 'pull (variable)', # 0x08
197             "6" => 'pull stack -> (result)',
198             "7-9" => 'pull (variable)',
199             },
200             { "3-" => 'split_window lines'}, # 0x0a
201             { "3-" => 'set_window window'}, # 0x0b
202             { "4-" => 'call_vs2 routine (0-7args) -> (result)'}, # 0x0c
203             { "4-" => 'erase_window window'}, # 0x0d
204             # XXX translate_command will get different keys depending on version!
205             # I believe this is the only command for which this happens. All other
206             # commands you just get extra (possibly optional) args.
207             { "4:5:7:8:9" => 'erase_line value', # 0x0e
208             "6" => 'erase_line pixels',
209             },
210             { "4:5:7:8:9" => 'set_cursor line column', # 0x0f
211             "6" => 'set_cursor line column window',
212             },
213            
214             { "4-" => 'get_cursor array'}, # 0x10
215             { "4-" => 'set_text_style style'}, # 0x11
216             { "4-" => 'buffer_mode flag'}, # 0x12
217             { "3-4" => 'output_stream number ', # 0x13
218             "5:7:8" => 'output_stream number table',
219             "6" => 'output_stream number table width',
220             },
221             { "3-" => 'input_stream number'}, # 0x14
222             # Spec says defined in v5, first used in v3?!
223             { "3-" => 'sound_effect number effect volume routine'}, # 0x15
224             { "4-" => 'read_char 1 time routine -> (result)'}, # 0x16
225             { "4-" => 'scan_table x table len form -> (result)'}, # 0x17
226             { "5-" => 'not value -> (result)'}, # 0x18
227             { "5-" => 'call_vn routine (0-3args)'}, # 0x19
228             { "5-" => 'call_vn2 routine (0-7args)'}, # 0x1a
229             { "5-" => 'tokenise text parse dictionary flag'}, # 0x1b
230             { "5-" => 'encode_text zscii_text length from coded_text'}, # 0x1c
231             { "5-" => 'copy_table first second size'}, # 0x1d
232             { "5-" => 'print_table zscii_text width height skip'}, # 0x1e
233             # Bug in spec?! It doesn't list label
234             { "5-" => 'check_arg_count argument_number ?(label)'}, # 0x1f
235             );
236            
237             # Extended opcodes EXT
238             my @ext_ops = (
239             # XXX "table bytes name" are optional. IF we get that many args,
240             # fill in those values, else we just get a result & do a normal save
241             { "5-" => 'save table bytes name -> (result)'}, # 0x00
242             { "5-" => 'restore table bytes name -> (result)'}, # 0x01
243             { "5-" => 'log_shift number places -> (result)'}, # 0x02
244             { "5-" => 'art_shift number places -> (result)'}, # 0x03
245             { "5-" => 'set_font font -> (result)'}, # 0x04
246             { "6" => 'draw_picture picture_number y x'}, # 0x05
247             { "6" => 'picture_data picture_number array ?(label)'}, # 0x06
248             { "6" => 'erase_picture picture_number y x'}, # 0x07
249             { "6" => 'set_margins left right window'}, # 0x08
250             { "5-" => 'save_undo -> (result)'}, # 0x09
251             { "5-" => 'restore_undo -> (result)'}, # 0x0a
252             { "5-" => 'print_unicode char_number'}, # 0x0b
253             { "5-" => 'check_unicode char_number -> (result)'}, # 0x0c
254             '', # 0x0d
255             '', # 0x0e
256             '', # 0x0f
257            
258             { "6" => 'move_window window y x'}, # 0x10
259             { "6" => 'window_size window y x'}, # 0x11
260             { "6" => 'window_style window flags operation'}, # 0x12
261             { "6" => 'get_wind_prop window property_number -> (result)'}, # 0x13
262             { "6" => 'scroll_window window pixels'}, # 0x14
263             { "6" => 'pop_stack items stack'}, # 0x15
264             { "6" => 'read_mouse array'}, # 0x16
265             { "6" => 'mouse_window window'}, # 0x17
266             { "6" => 'push_stack value stack ?(label)'}, # 0x18
267             { "6" => 'put_wind_prop window property_number value'}, # 0x19
268             { "6" => 'print_form formatted_table'}, # 0x1a
269             { "6" => 'make_menu number table ?(label)'}, # 0x1b
270             { "6" => 'picture_table table'}, # 0x1c
271             );
272            
273             my (@generic_opcodes);
274             $generic_opcodes[OP_0OP] = \@zero_ops;
275             $generic_opcodes[OP_1OP] = \@one_ops;
276             $generic_opcodes[OP_2OP] = \@two_ops;
277             $generic_opcodes[OP_VAR] = \@var_ops;
278             $generic_opcodes[OP_EXT] = \@ext_ops;
279            
280             sub parse_command {
281             # See ZMachine spec chapter 4
282            
283             ##################### OK, finally ready to start the real sub
284 2959     2959 0 8054 my %parsed = ( "opcode_address" => $PC );
285 2959         6009 my $z_version = $Language::Zcode::Util::Constants{version};
286            
287 2959         17789 my $opcode = &eat_byte();
288 2959         5259 my $op_style = OP_UNKNOWN;
289 2959         7446 my @operands = ();
290 2959         40487 my $is_var_ops = 0;
291 2959 100 66     20724 if (($opcode & 0x80) == 0) {
    100          
    100          
292             # If top bit is zero: opcode is "long" format, which is always 2OP
293             # ME: Handle these first as they seem to be the most common.
294             # Next two bits give operand types for the two ops
295             # type is small constant (0) or variable number (1)
296 834 100       3068 @operands = (load_operand($opcode&0x40 ? OP_TYPE_VAR : OP_TYPE_SMALL),
    100          
297             load_operand($opcode&0x20 ? OP_TYPE_VAR : OP_TYPE_SMALL));
298 834         1518 $opcode &= 0x1f; # last 5 bits
299 834         3733 $op_style = OP_2OP;
300            
301             } elsif ($opcode & 0x40) {
302             # top 2 bits are both 1: "variable" format opcode. Opcode in bottom 5
303             # bits. This may actually be a 2OP opcode...
304 1077 100       2316 $op_style = $opcode & 0x20 ? OP_VAR : OP_2OP;
305 1077         1507 $opcode &= 0x1f;
306 1077         1669 $is_var_ops = 1; # load operands later
307            
308             } elsif ($opcode == 0xbe && $z_version >= 5) {
309             # "extended" opcode
310 16         33 $opcode = &eat_byte();
311 16         24 $op_style = OP_EXT;
312 16         23 $is_var_ops = 1; # load operands below
313             } else {
314             # "short" format opcode: next two bits mean zero or 1 OP
315 1032 100       2691 if (($opcode & 0x30) == 0x30) {
316 546         903 $op_style = OP_0OP;
317             } else {
318 486         655 $op_style = OP_1OP;
319 486         634 my $optype = ($opcode & 0x30) >> 4;
320 486         847 push @operands, &load_operand($optype);
321             }
322 1032         1926 $opcode &= 0x0f;
323             }
324            
325             # Which command is it?
326 2959 50       10192 my $syntax = $generic_opcodes[$op_style]->[$opcode]
327             or warn("Unknown opcode $TYPE_LABELS[$op_style] $opcode"), return;
328             # Deal with version-dependent codes
329 2959 100       7544 if (ref $syntax eq "HASH") {
330 864         3775 my %syn = %$syntax;
331 864         1983 my $v = $z_version; # nickname for conciseness below
332 864         2474 $syntax = "";
333 864         12808 foreach my $range (keys %syn) {
334 1002 100 66     26250 if (($range =~ /^(\d+)$/ && $v == $1) ||
      66        
      33        
      100        
      100        
      66        
      66        
      66        
335             ($range =~ /^(\d+)-$/ && $v >= $1) ||
336             ($range =~ /^(\d+)-(\d+)$/ && $v >= $1 && $v <= $2) ||
337             # One day there might be a version 10, and v1 shouldn't match...
338             ($range =~ /:/ && $range =~ /\b$v\b/))
339             {
340 864         1846 $syntax = $syn{$range};
341 864         2470 last;
342             }
343             }
344 864 50       3443 if (!$syntax) {
345 0         0 warn("opcode $TYPE_LABELS[$op_style] $opcode illegal for v$v");
346 0         0 return;
347             }
348             }
349 2959         19176 my ($command, @keys) = split " ", $syntax;
350            
351             # Read leftover ops for VAR opcodes
352 2959         4709 my ($operand_types, $i);
353 2959 100       9066 if ($is_var_ops) {
354             # a VAR or EXT opcode with variable argument count.
355             # Load the arguments.
356 1093 100 100     19306 if ($op_style == OP_VAR &&
357             ($command =~ /^call_v[sn]2$/)) {
358             # 4.4.3.1: there may be two bytes of operand types, allowing
359             # for up to 8 arguments. This byte will always be present,
360             # though it does NOT have to be used...
361 16         27 $i = 14;
362             # start shift mask: target "leftmost" 2 bits
363 16         66 $operand_types = &eat_word();
364             } else {
365             # 4.4.3: one byte of operand types, up to 4 args.
366 1077         2379 $i = 6;
367 1077         3591 $operand_types = &eat_byte();
368             }
369             # printf STDERR "%s: ", $operand_types;
370 1093         3082 for (; $i >=0; $i -= 2) {
371 3654         6949 my $optype = ($operand_types >> $i) & 0x03;
372             # print STDERR "$optype\n";
373 3654 100       7682 if (defined (my $op = &load_operand($optype))) {
374 2921         12896 push @operands, $op;
375             } else {
376 733         2034 last; # done getting args
377             }
378             }
379             # print STDERR "\n";
380             }
381            
382             # Read any remaining args if necessary.
383             # Also, assign operands to operand names, creating %parsed
384 2959         10507 $parsed{opcode} = $command;
385             # print "$command @operands\n";
386 2959         5403 for my $key (@keys) {
387 5352 100       12274 next if $key eq "->";
388            
389             # Read branch/result args, which are not counted in the Z-code
390             # argument count bits (VAR/1OP etc.).
391 5107 100       19477 if ($key eq "?(label)") {
    100          
    100          
392             # XXX HACK! jump counts the ?(label) as an arg and
393             # reads it as a SIXTEEN-bit offset
394             # XXX Change jump's arg in @one_ops?
395 466         640 my $offset;
396 466 100       852 if ($command eq "jump") {
397 126         237 $offset = shift @operands;
398             # I *think* this doesn't happen
399 126 50       543 if ($offset =~ /\D/) {
400 0         0 die "jump opcode takes a variable offset at $PC\n";
401             }
402 126 100       412 $offset -= (1<<16) if $offset & (1<<15); # SIGNED offset
403             # negate_jump doesn't exist
404             } else {
405 340         801 my $arg = eat_byte();
406 340         956 $parsed{"negate_jump"} = ($arg & 0x80) == 0;
407 340         469 $offset = $arg & 0x3f; # offset is 0-63 OR...
408 340 100       1255 if (!($arg & 0x40)) { # 14-bit signed offset
409 96         194 $offset <<= 8;
410 96         172 $offset |= eat_byte();
411 96 100       241 $offset -= (1<<14) if $offset & (1<<13); # SIGNED offset
412             }
413             }
414             # Offset of 1 or 0 really means return
415 466 100 100     2094 if ($offset == 1 || $offset == 0) {
416 8         20 $parsed{"jump_return"} = $offset;
417 8         14 $parsed{"label"} = "";
418             } else {
419             # 4.7.2: "Address after branch data + Offset - 2"
420             # (-2 seems to apply to jump also, maybe because you read
421             # a two-byte word, then apply offset)
422 458         1470 my $destination = $PC + $offset - 2;
423             # printf("addr: %s, PC: %x, offset: %s%x, dest: %d\n",
424             # $parsed{opcode_address}, $PC, ($offset<0 && "-"),
425             # (abs$offset), $destination);
426 458         852 $parsed{"label"} = $destination;
427             }
428 466         1401 next;
429             } elsif ($key eq "(result)") {
430             # Store the raw number, which we use for call stack's store_var,
431             # as well as the variable name, like local2.
432 245         762 $parsed{"result_num"} = eat_byte();
433 245         522 $parsed{"result"} = num_to_var($parsed{"result_num"});
434 245         1061 next;
435             } elsif ($key eq "(literal_string)") {
436             # Make this just a print_addr
437 366         672 $parsed{literal_string} = $PC;
438             # For debugging purposes, get the string to print
439 366         775 my $q = decode_text(); $q =~ s/\n/^/g;
  366         1079  
440 366         842 $parsed{print_string} = $q;
441             }
442            
443             # At this point, we've theoretically read all possible args.
444             # So if @operands is empty, there's an optional arg that wasn't given
445 4396 100       12722 next unless @operands;
446            
447             # Now handle all the other arg types
448 3985 100       18538 if ($key =~ /arg[s1]/) { # call_* has 'args', call_2* has 'arg1'
    100          
    100          
449             # args are already sitting in operands
450 724         2542 $parsed{"args"} = \@operands;
451             } elsif ($key eq "routine") {
452 749         2713 $parsed{$key} = shift @operands;
453             } elsif ($key eq "(variable)") {
454             # Spec: "passed by reference"
455 812         1900 $parsed{"variable"} = num_to_var(shift @operands);
456             } else {
457 1700         7071 $parsed{$key} = shift @operands;
458             }
459             }
460            
461             # Calls need to store the address of the command AFTER the call,
462             # which is where the Z-machine resumes after finishing the call.
463             # (For saves, quetzal stores the byte of the store variable in the @save)
464 2959 100       11550 if ($command =~ /^call/) { $parsed{"next_pc"} = $PC }
  749 100       2349  
465 2         6 elsif ($command eq "save") { $parsed{"restore_pc"} = $PC-1 }
466            
467 2959         4068 if (0) { #$write_opcodes) {
468             #warn sprintf "addr:%s type:%s opcode:%02x (%s) operands:%s\n",
469             #$TYPE_LABELS[$op_style],
470             print((map {"$_=$parsed{$_} "} keys %parsed), "\n");
471             }
472            
473 2959         54756 return %parsed;
474             # async interpreter call (v4+), not implemented
475             # elsif ($op_style == OP_1OP && $opcode == 0x0b) {
476             # my $result = StoryFile::ret($operands[0]); }
477             }
478            
479             } # Extra scoping brace around parse_command init stuff
480            
481             # Read one operand of the given type, or
482             # return undef if given an argument of OP_TYPE_DONE
483             sub load_operand {
484 5808     5808 0 23207 my $op_type = shift;
485             # My kingdom for a switch!
486 5808 100       19920 if ($op_type == OP_TYPE_VAR) {
    100          
    100          
    50          
487 942         2289 return num_to_var(eat_byte());
488             } elsif ($op_type == OP_TYPE_SMALL) {
489 2662         6683 return eat_byte();
490             } elsif ($op_type == OP_TYPE_LARGE) {
491 1471         3196 return eat_word();
492             } elsif ($op_type == OP_TYPE_DONE) {
493 733         2298 return undef;
494             } else {
495 0         0 die "Unknown arg '$op_type' to load_operands" ;
496             }
497             }
498            
499             # Read a byte and move the Program Counter forward
500             sub eat_byte {
501 8592     8592 0 29844 return $Language::Zcode::Util::Memory[$PC++];
502             }
503            
504             # Read a word and move the Program Counter forward
505             sub eat_word {
506 2865     2865 0 5095 my $word = $Language::Zcode::Util::Memory[$PC++] << 8;
507 2865         4760 $word += $Language::Zcode::Util::Memory[$PC++];
508 2865         8265 return $word;
509             }
510            
511             sub num_to_var {
512 1999     1999 0 3155 my $num = shift;
513 1999 100 66     14933 if ($num =~ /^(sp|local\d+|g[a-f\d]{2})$/) {
    100 33        
    100          
    50          
514             # e.g., load sp (load, store, etc. pass by reference)
515             # Can't dereference until runtime.
516 66         467 return "[$1]";
517             } elsif ($num == 0) {
518 160         538 return "sp";
519             } elsif ($num >=1 && $num <=15) {
520 1440         26632 return "local" . ($num-1);
521             } elsif ($num >= 16 && $num <= 255) {
522 333         2886 return "g" . sprintf("%02x", $num - 16);
523             } else {
524 0         0 die "Illegal value '$num' passed to num_to_var";
525             }
526             }
527            
528             # XXX TODO move this (back) to Language::Zcode::Util?
529             #
530             # decode and return text at this address; see spec section 3
531             # These are entries 6-32 in the 3 ZSCII alphabets
532             # XXX Differences for versions 1,2
533             sub decode_text {
534 366     366 0 519 my $buffer = "";
535             # XXX Versions 5+ may have different alphabet table.
536 366         18488 my @alpha_table = (
537             [ 'a' .. 'z' ],
538             [ 'A' .. 'Z' ],
539             # char 6 means 10-bit ZSCII follows
540             [ undef, split//,qq{\n0123456789.,!?_#'"/\\-:()} ]
541             );
542            
543 366         1544 my ($word, $zshift, $zchar);
544 366         477 my $alphabet = 0;
545 366         525 my $abbreviation = 0;
546 366         451 my $two_bit_code = 0;
547 366         588 my $two_bit_flag = 0;
548             # XXX HACK!
549 366         479 my $flen = @Language::Zcode::Util::Memory;
550            
551 366         422 while (1) {
552 1376 50       2882 last if $PC >= $flen;
553 1376         22817 $word = eat_word();
554             # spec 3.2
555 1376         4296 for ($zshift = 10; $zshift >= 0; $zshift -= 5) {
556             # break word into 3 zcharacters of 5 bytes each
557 4128         6212 $zchar = ($word >> $zshift) & 0x1f;
558 4128 100       19046 if ($two_bit_flag > 0) {
    50          
    100          
559             # Ten-bit ZSCII character. spec 3.4
560 152 100       396 if ($two_bit_flag++ == 1) { # middle of char
561 76         185 $two_bit_code = $zchar << 5; # first 5 bits
562             } else { # end of char
563 76         115 $two_bit_code |= $zchar; # last 5
564 76         160 $buffer .= chr($two_bit_code);
565 76         212 $two_bit_code = $two_bit_flag = 0;
566             }
567             } elsif ($abbreviation) {
568             # synonym/abbreviation; spec 3.3
569 0         0 my $entry = (32 * ($abbreviation - 1)) + $zchar;
570             # Spec 3.3, 1.2.2: fetch and convert the "word PC" of the
571             # given entry in the abbreviations table.
572             # "word address"; only used for abbreviations (packed address
573             # rules do not apply here)
574             # my $abbrev_addr =
575             # $Language::Zcode::Util::Constants{abbrev_table_address} +
576             # $entry * 2;
577             #my $addr = Language::Zcode::Util::get_word_at($abbrev_addr) * 2;
578             #my $expanded = decode_text($addr);
579 0         0 $buffer .= "[abbrev $entry]";
580             #print STDERR "abbrev $abbreviation $expanded\n";
581 0         0 $abbreviation = 0;
582             } elsif ($zchar < 6) {
583 1310 100 0     3501 if ($zchar == 0) {
    100          
    50          
    0          
584 422         1485 $buffer .= " ";
585             } elsif ($zchar == 4) {
586             # spec 3.2.3: shift character; alphabet 1
587 106         378 $alphabet = 1;
588             } elsif ($zchar == 5) {
589             # spec 3.2.3: shift character; alphabet 2
590 782         2346 $alphabet = 2;
591             } elsif ($zchar >= 1 && $zchar <= 3) {
592             # spec 3.3: next zchar is an abbreviation code
593 0         0 $abbreviation = $zchar;
594             }
595             } else {
596             # spec 3.5: convert remaining chars from alpha table
597 2666         3686 $zchar -= 6;
598             # convert to string index
599 2666 100       5961 if ($alphabet != 2) {
600 2228         5910 $buffer .= $alpha_table[$alphabet]->[$zchar];
601             } else {
602             # alphabet 2; some special cases (3.5.3)
603 438 100       1574 if ($zchar == 0) {
    100          
604 76         131 $two_bit_flag = 1;
605             } elsif ($zchar == 1) {
606             # Why did rezrov do this? -ADK
607             #$buffer .= chr(Games::Rezrov::ZConst::Z_NEWLINE());
608 118         570 $buffer .= "\n";
609             } else {
610 244         646 $buffer .= $alpha_table[$alphabet]->[$zchar];
611             }
612             }
613 2666         9073 $alphabet = 0; # turn "Shift" off
614             # XXX applies to this character for version > 2 (3.2.3)
615             }
616             }
617             # Last bit set
618 1376 100       3954 last if $word & 0x8000;
619             }
620 366         5030 return $buffer;
621             }
622            
623             1;
624