File Coverage

blib/lib/Language/Zcode/Parser/Perl.pm
Criterion Covered Total %
statement 113 124 91.1
branch 48 56 85.7
condition 18 21 85.7
subroutine 12 12 100.0
pod 1 5 20.0
total 192 218 88.0


line stmt bran cond sub pod time code
1             package Language::Zcode::Parser::Perl;
2            
3 2     2   11 use strict;
  2         4  
  2         77  
4 2     2   10 use warnings;
  2         5  
  2         83  
5 2     2   10 use base qw(Language::Zcode::Parser::Generic);
  2         4  
  2         1411  
6            
7             =head1 NAME
8            
9             Language::Zcode::Parser::Perl - Z-code parser in pure Perl
10            
11             =head1 DESCRIPTION
12            
13             =head2 Finding subroutine starts and ends
14            
15             Things we know:
16            
17             =over 4
18            
19             =item 1a
20            
21             We understand the syntax of all opcodes that are in the spec. (modulo bugs)
22            
23             =item 1b
24            
25             0 is not a legal opcode (almost every other 1-byte number is,
26             depending on version -- but see NOTES)
27            
28             =item 2a
29            
30             Subs must start at packed addresses. Bytes between subs are always zero
31             (I hope!)
32            
33             =item 2b
34            
35             Subs must start with a byte 0-15
36            
37             =item 2c
38            
39             If header byte is zero, next byte CAN'T be a zero, cuz there are
40             no locals so it has to be a command, and 0 isn't a command
41            
42             =item 2d
43            
44             Subs must be called with call* opcodes, although it is legal to call
45             a variable (like "call_2n sp 1 2")
46            
47             =item 3a
48            
49             There is no way for the program to get past a ret, rfalse (etc.) or jump
50             (backwards) command without jumping past it.
51            
52             =item 3b
53            
54             jump opcodes cannot take variable args
55            
56             =item 3c
57            
58             There may be code after a sub-ender that is not jumped into. This is a (rare,
59             but existent) orphan fragment.
60            
61             =back
62            
63             The upshot of this is that, if we propose that a sub starts at a given address,
64             we can unambiguously read (the header and) commands until we hit a sub-ender
65             that is not jumped past. If we find unexpected 0 bytes, for example, then we
66             were wrong about the sub's starting address.
67            
68             So:
69            
70             read a command. (Note if it has a sub call or a jump)
71            
72             if next byte is a known start of sub {
73             we finished this sub! Celebrate
74            
75             } else if next byte is a 0 {
76             # there must be a sub next
77             if there's more than one 0 {
78             skip to the last 0 in the series
79             again, if we get to known start of sub, we're done
80             }
81             if last 0 is on packed address {
82             start a sub here # 0 local vars, so next byte must be (non-zero) cmd
83             } else if next byte is on packed address and is 1-15 {
84             start a sub at that byte
85             } else error!
86            
87             } else if not on packed address OR next byte is not 1-15 { # must be command
88             read next command
89            
90             } else { # start doing things I'm less sure about
91             # During this less sure part, if I get a parsing error, try
92             # the other possibility
93             if previous command was a ret, rfalse etc. that we have not jumped past {
94             read sub
95             } else {
96             read command
97             }
98             }
99            
100             Also stop if we get to a known string address or end of the file. The
101             first string may be referenced in a sub we don't see, or may not be referenced
102             at all (Zork1 always call print_paddr with variables, not constant string
103             addresses.) so we'll run past the end of the last sub and into the strings.
104            
105             Arg to a call is considered the most authoritative demonstration that
106             a sub exists. 0..15 byte at a packed address is slightly less sure, especially
107             if there are no 0 bytes separating it from the previous sub (could be
108             an orphan fragment).
109            
110             =cut
111            
112 2     2   14 use constant SURE => 8;
  2         2  
  2         91  
113 2     2   12 use constant ALMOST_SURE => 4;
  2         4  
  2         79  
114 2     2   10 use constant PROBABLE => 2;
  2         2  
  2         131  
115 2     2   9 use constant MAYBE => 1;
  2         3  
  2         3242  
116            
117             # Note throughout that eat_byte gets a byte and sets PC() to be that byte's
118             # position PLUS ONE
119             # TODO get rid of @todo. Just use
120             # (grep {$prob ^ DONE} sort {$prob} keys %prob)[0]
121             # When finishing a sub, $prob |= DONE
122             # $packed, %end_codes go into main while loop (which becomes subroutine)
123             # $string_min, %prob = try_sub($string_min, %prob);
124             sub find_subs {
125             # Only sub we know of right now is (1 byte before)
126             # the start address in the header
127 2     2 0 10 my $main_sub = $Language::Zcode::Util::Constants{first_instruction_address} - 1;
128 2         12 my @todo = ($main_sub);
129 2         9 my %prob = ($main_sub => SURE);
130             # Also, try the address just after the dictionary ends
131 2         7 my $dict_end = &end_of_dictionary;
132 2 50       17 if ($dict_end != $main_sub) {
133 0         0 push @todo, $dict_end;
134 0         0 $prob{$dict_end} = MAYBE;
135             }
136             # For finding packed addresses
137 2         5 my $packed = $Language::Zcode::Util::Constants{packed_multiplier};
138 2         5 my $string_min = $Language::Zcode::Util::Constants{file_length}-1; # min addr of strings
139 2         5 my @subs;
140             # Codes that can end a routine
141 2         5 my %end_codes = map {$_ => 1}
  14         44  
142             # Note: only jump can cleanly end a sub, not je & other branch ops
143             qw(ret rfalse rtrue ret_popped print_ret jump quit);
144            
145             # We shift subs out. So if we find something we're sure is a sub,
146             # unshift it into the list. If we're not so sure, push it onto the end,
147             # so we won't look at it until after looking at subs we're sure of.
148 2         16 while (defined (my $rtn = shift @todo)) {
149 68         269 my $hr = sprintf('%x', $rtn);
150             # print "Routine $hr ($rtn): ";
151             # Read num_locals -- and read the locals, for v1-4
152 68         104 eval {&Language::Zcode::Parser::Opcode::parse_sub_header($rtn)};
  68         581  
153 68 50       143 if ($@) { warn $@; delete $prob{$rtn}; next }
  0         0  
  0         0  
  0         0  
154 68         202 my $max_PC = PC(); # we know the sub goes at least until...
155 68         90 my $last_command = $max_PC; # address of last command in the sub
156 68         188 while (1) {
157             # changes PC
158 1481         5186 my %command=&Language::Zcode::Parser::Opcode::parse_command;
159 1481 50       7668 delete $prob{$rtn}, last unless %command; # unknown opcode
160 1481         2392 $last_command = $command{opcode_address};
161 1481         3797 my $sub_ender = exists $end_codes{$command{opcode}};
162 1481         3939 my $pc = PC();
163            
164 1481 50       3468 if ($pc >= $string_min) {
165             # print "sub ends at $pc, start of strings\n";
166 0         0 $max_PC = $string_min -1;
167 0         0 last;
168             }
169            
170             # If we can branch to a point later in the code, we know the
171             # sub goes at least until there.
172             # (IRL, you can jump outside a sub, but we're ignoring that.)
173             # jz foo 0/1 will have label = "", cuz it really means "return"
174 1481 100 100     8258 if (exists $command{label} && !exists $command{jump_return}) {
175 229         1424 my $l = $command{label};
176 229 50       1066 die "Illegal to jump to a variable ($pc)!?" if $l =~ /\D/;
177 229 100       539 $max_PC = $l if $l > $max_PC;
178             }
179            
180             # For call* commands, note addresses of the subs they call
181 1481 100       3388 if (exists $command{routine}) {
182             # p_a_s will return 0/undef if it's not a "useful" call
183 375 100       2054 if (my $r = packed_address_str($command{routine}, "routine")) {
184 371 100       1411 unshift @todo, $r if !exists $prob{$r};
185 371         1474 $prob{$r} |= ALMOST_SURE; # pretty sure it's a sub
186             }
187             }
188            
189             # Find address of first string - stop parsing routines there!
190 1481 100       3476 if (exists $command{packed_address_of_string}) {
191 8         16 my $s = $command{packed_address_of_string};
192 8         18 $s = packed_address_str($s, "packed_address_of_string");
193 8 100 100     43 if (defined $s && $s < $string_min) {
194             # print "$s < $string_min - new string min\n";
195 3         6 $string_min = $s;
196             }
197             }
198            
199            
200             # Now go through a long complicated procedure to see if
201             # we've finished the sub
202            
203             # 0 byte means there must be a sub next
204             # (Note: we may change PC() in here)
205 1481 100 100     4031 if ((my $byte = &peek()) == 0) {
    100 100        
206             # byte starting next sub must be at packed address
207             # and must be (0 followed by nonzero OR 1..15)
208             # (We also know $packed is always at least 2)
209             # Skip zero or more 0's until byte AFTER me is NOT zero
210 55         160 $byte = Language::Zcode::Parser::Opcode::eat_byte()
211             until peek() != 0;
212 55         105 $pc = PC();
213 55 100       124 if ($pc >= $string_min) {
214             # print "sub followed by zeroes and first string $pc\n";
215 1         3 $max_PC = $string_min -1;
216 1         3 last;
217             }
218             # If we read 0 byte starting a sub, (numlocals = 0), back up
219 54 100 66     276 if ($pc % $packed == 1 && $byte == 0) { PC(--$pc); }
  11         70  
220            
221             # ERRORS. If we read a 0 byte, but a new sub doesn't start
222             # at the next packed address, then the *current* sub
223             # we're reading must not really be a sub!
224 54 100 66     267 if ($pc % $packed || &peek() > 15) { # 0 byte, but no new sub!
    50          
225 1         4 warn peek(), " at $pc > 15. 0 in middle of sub!\n";
226 1         6 delete $prob{$rtn};
227 1         4 last;
228             } elsif ($max_PC>$pc) {
229 0         0 warn "Max $max_PC > $pc in rtn $rtn. Jump past sub end?\n";
230 0         0 delete $prob{$rtn};
231 0         0 last;
232             }
233            
234             # Sub to try. May or may not be new. We'll "last" below
235 53         125 $prob{$pc} |= PROBABLE; # somewhat sure it's a new sub
236            
237             # If we *can* finish a sub now, but we don't KNOW there's
238             # another sub starting now, then *probably* we ended sub,
239             # but it might be an orphan code fragment
240             } elsif ($sub_ender && $max_PC < $pc && !exists $prob{$pc}) {
241 3 100 66     21 if ($byte <=15 && $pc % $packed == 0) {
242             # COULD be an orphan code fragment w/ 1..15 byte: very rare
243             # printf "ASSUME ";
244 2         7 $prob{$pc} |= MAYBE; # not entirely sure it's a new sub
245             } else {
246             # printf "Orphan code fragment: PC %x ($pc)\n",$pc
247             }
248             }
249            
250             # We know sub lasts at least until end of command we just read
251 1479 100       3843 $max_PC = $pc-1 if $pc > $max_PC;
252            
253             # Found a new sub here?
254 1479 100       5914 if (exists $prob{$pc}) {
255             # print "sub ends at ";
256             # print "start of sub " if $prob{$pc} & ALMOST_SURE;
257             # printf "%x ($pc).\n", $pc;
258             # less sure of these subs; 'push' means try them last
259 66 100       171 push @todo, $pc if $prob{$pc} < ALMOST_SURE;
260 66         169 last; # Starting a new sub, so stop reading this one
261             } # else keep reading commands
262             }
263            
264 68 100       193 if (exists $prob{$rtn}) { # bad subs have been delete()d
265 67         380 my $routine = new Language::Zcode::Parser::Routine $rtn;
266 67         251 $routine->end($max_PC);
267 67         214 $routine->last_command_address($last_command);
268 67         227 push @subs, $routine;
269             # If we made it to the end of a sub, we're pretty sure it's real
270 67         419 $prob{$rtn} |= ALMOST_SURE; # (if we weren't sure about it already)
271             }
272             }
273            
274 2         20 return sort {$a->address <=> $b->address} @subs;
  255         642  
275             }
276            
277             sub PC {
278 1628 100   1628 0 4037 $Language::Zcode::Parser::Opcode::PC = $_[0] if $_[0];
279 1628         3414 return $Language::Zcode::Parser::Opcode::PC
280             }
281 1699     1699 0 8107 sub peek { $Language::Zcode::Util::Memory[$Language::Zcode::Parser::Opcode::PC] }
282            
283             # Returns undef for situations where we don't get a true address
284             # "@call sp", where we don't know sub address, OR "call 0", which isn't a call
285             sub packed_address_str {
286 383     383 0 642 my ($address, $key) = @_;
287 383 50       760 return undef if !$address;
288 383         4273 my %c = %Language::Zcode::Util::Constants;
289 383         2190 my $mult = $c{packed_multiplier};
290 383         443 my $add;
291             # (Add will be zero for versions not 6 or 7)
292 383 100       1710 if ($key eq "routine") {
    50          
293 375         722 $add = 8 * $c{routines_offset};
294             } elsif ($key eq "packed_address_of_string") {
295 8         16 $add = 8 * $c{strings_offset};
296 0         0 } else { die "Unknown key $key to packed_address_str" }
297            
298             # Now actually create the string. Only do calculation for true number
299 383 100       1726 if ($address =~ /^\d+$/) {
300 375         2598 return $mult * $address + $add;
301             } else {
302 8         42 return undef;
303             }
304             }
305            
306             =head2 end_of_dictionary
307            
308             Find the first packed address after the end of the dictionary.
309             (This is a likely place for the lowest-address subroutine.)
310            
311             =cut
312            
313             sub end_of_dictionary {
314 2     2 1 6 my $dict = $Language::Zcode::Util::Constants{dictionary_address};
315 2         8 PC($dict);
316            
317             # get token separators
318 2         10 my $sep_count = Language::Zcode::Parser::Opcode::eat_byte();
319 2         13 Language::Zcode::Parser::Opcode::eat_byte() for 1..$sep_count;
320            
321             # number of bytes for each encoded word
322             # Spec 13.3: this includes the word itself PLUS some data
323             # number of words in the dictionary
324 2         7 my $entry_length = Language::Zcode::Parser::Opcode::eat_byte();
325 2         9 my $entry_count = Language::Zcode::Parser::Opcode::eat_word();
326             # Now skip N M-byte words -> first byte AFTER dictionary
327             # Then go to first packed address after that
328 2         13 my $word_start = PC();
329 2         5 my $dict_end = $word_start + $entry_count * $entry_length;
330             # printf "Start at $dict (%x). $entry_count $entry_length-byte words.",$dict;
331             # printf "\nEnd at $dict_end (%x)\n", $dict_end;
332 2         3 my $byte;
333 2         7 my $packed = $Language::Zcode::Util::Constants{packed_multiplier};
334 2         5 PC($dict_end);
335 2         7 $byte = Language::Zcode::Parser::Opcode::eat_byte()
336             until PC() % $packed == 0;
337 2         7 $dict_end = PC();
338             # printf "First possible sub after dict is PC $dict_end (%x)\n", $dict_end;
339 2         5 return $dict_end;
340             }
341            
342             =head1 NOTES
343            
344             Actually, the remarks on section 14 of the spec say, "The 2OP opcode 0 was
345             possibly intended for setting break-points in debugging (and may be used for
346             this again). It was not nop." So in theory my algorithm may not be right.
347             Oh well.
348            
349             =head1 TODO
350            
351             This will break if there's data interleaved between the subs.
352             See SPEC comments on section 1.
353            
354             Start at the byte after the end of the dictionary. Look at every packed
355             address that's not included in a subroutine I've already found, up until
356             we get to the strings. If I find something that looks like a sub, start
357             parsing commands as above, except with a "not sure" flag set. If we find
358             calls in that sub, follow them, but propagate the "not sure" flag.
359            
360             =cut
361            
362             1;