File Coverage

blib/lib/CPU/Z80/Disassembler.pm
Criterion Covered Total %
statement 415 453 91.6
branch 195 234 83.3
condition 46 52 88.4
subroutine 55 58 94.8
pod 21 21 100.0
total 732 818 89.4


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler - Disassemble the flow of a Z80 program
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 6     6   1014104 use strict;
  6         40  
  6         182  
14 6     6   34 use warnings;
  6         12  
  6         161  
15              
16 6     6   33 use Carp; our @CARP_NOT; # do not report errors in this package
  6         21  
  6         453  
17              
18 6     6   2840 use CPU::Z80::Disassembler::Memory;
  6         19  
  6         46  
19 6     6   3140 use CPU::Z80::Disassembler::Instruction;
  6         19  
  6         40  
20 6     6   251 use CPU::Z80::Disassembler::Format;
  6         11  
  6         398  
21 6     6   3510 use CPU::Z80::Disassembler::Labels;
  6         18  
  6         58  
22              
23 6     6   2996 use Path::Tiny;
  6         39221  
  6         532  
24              
25             our $VERSION = '1.01';
26              
27             #------------------------------------------------------------------------------
28              
29             =head1 SYNOPSIS
30              
31             use CPU::Z80::Disassembler;
32             $dis = CPU::Z80::Disassembler->new;
33             $dis->memory->load_file($file_name, $addr, $opt_skip_bytes, $opt_length);
34             $dis->write_dump; $dis->write_dump($file);
35             $dis->analyse;
36             $dis->write_asm; $dis->write_asm($file);
37              
38             $dis->get_type($addr);
39             $dis->set_type_code($addr [,$count]);
40             $dis->set_type_byte($addr [,$count]);
41             $dis->set_type_word($addr [,$count]);
42              
43             $dis->set_call($addr, 1); # this may be called
44             $dis->set_call($addr, $sub); # @next_code = $sub->($self, $next_addr) will be called
45              
46             $dis->code($addr [, $label]);
47             $dis->defb($addr [, $count][, $label]);
48             $dis->defw($addr [, $count][, $label]);
49             $dis->defm($addr, $size [, $label]);
50             $dis->defmz($addr [, $count][, $label]);
51             $dis->defm7($addr [, $count][, $label]);
52              
53             $dis->block_comment($addr, $block_comment);
54             $dis->line_comments($addr, @line_comments);
55              
56             $dis->relative_arg($addr, $label_name);
57             $dis->ix_base($addr);
58             $dis->iy_base($addr);
59            
60             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
61             $dis->load_control_file($ctl_file);
62              
63             =head1 DESCRIPTION
64              
65             Implements a Z80 disassembler. Loads a binary file into memory and dumps
66             an unprocessed disassembly listing (see C).
67              
68             Alternatively there are functions to tell the disassembler where there are
69             data bytes and what are code entry points and labels. The disassembler will
70             follow the code by simulating a Z80 processor, to find out where the code region
71             finishes.
72              
73             As a C instruction may be followed by data, the disassembler tries to find
74             out if the called routine manipulates the return stack. If it does not, and ends
75             with a C, then the routine is considered safe, and the disassembly continues
76             after the C instruction. If the routine is not considered safe, a message is
77             written at the end of the disassembled file asking the used to check the
78             routines manually; the C method should then be used to tell how to
79             handle calls to that routine on the next iteration.
80              
81             The C function can be called just before dumping the output to try to find
82             higher level constructs in the assembly listing. For example, it transforms the
83             sequence C into C.
84              
85             The C dumps an assembly listing that can be re-assembled to obtain the
86             starting binary file. All the unknown region bytes are disassembled as C
87             instructions, and a map is shown at the end of the file with the code regions (C),
88             byte regions (C), word regions (C) and unknown regions (C<->).
89              
90             =head1 FUNCTIONS
91              
92             =head2 new
93              
94             Creates the object.
95              
96             =head2 memory
97              
98             L object
99             containing the memory being analysed.
100              
101             =head2 instr
102              
103             Reference to an array that contains all the disassembled instructions
104             as L
105             objects, indexed
106             by the address of the instruction. The entry is C if there is no
107             disassembled instruction at that address (either not known, or pointing to the second,
108             etc, bytes of a multi-byte instruction).
109              
110             =head2 labels
111              
112             Returns the L
113             object that contains all the defined labels.
114              
115             =head2 header, footer
116              
117             Attributes containing blocks of text to dump before and after the assembly listing.
118             They are used by C.
119              
120             =head2 ix_base, iy_base
121              
122             Base addess for (IX+DIS) and (IY+DIS) instructions, if constant in all the code.
123             Causes the disassembly to dump:
124              
125             IY0 equ 0xHHHH ; 0xHHHH is iy_base
126             ...
127             ld a,(iy+0xHHHH-IY0) ; 0xHHHH is the absolute address
128              
129             =cut
130              
131             #------------------------------------------------------------------------------
132             # Hold a disassembly session
133 6     6   47 use base 'Class::Accessor';
  6         19  
  6         723  
134             __PACKAGE__->mk_accessors(
135             'memory', # memory to disassemble
136             '_type', # identified type of each memory address, TYPE_xxx
137             'instr', # array of Instruction objects at each address
138             'labels', # all defined labels
139             '_call_instr', # hash of all call instructions where we are blocked
140             '_can_call', # hash of all subroutines we may call:
141             # 1 : can be called, no stack impact
142             # 0 : has stack impact, needs to be checked manually
143             # sub {} : call sub->($self, $next_addr) to handle
144             # stack impact and return next code addresses
145             # to continue disassembly after call
146             '_block_comments',
147             # array of block comment string at each address, printed before
148             # the address
149             'header', 'footer',
150             # header and footer sections of disassembled file
151             'ix_base', 'iy_base',
152             # base addess for (IX+DIS) and (IY+DIS)
153             );
154              
155 6     6   45 use constant TYPE_UNKNOWN => '-';
  6         15  
  6         338  
156 6     6   49 use constant TYPE_CODE => 'C';
  6         14  
  6         269  
157 6     6   41 use constant TYPE_BYTE => 'B';
  6         18  
  6         271  
158 6     6   39 use constant TYPE_WORD => 'W';
  6         21  
  6         425  
159             my $TYPES_RE = qr/^[-CBW]$/;
160              
161 6     6   40 use Exporter 'import';
  6         21  
  6         6500  
162             our @EXPORT = qw( TYPE_UNKNOWN TYPE_CODE TYPE_BYTE TYPE_WORD );
163              
164              
165             sub new {
166 18     18 1 11435783 my($class) = @_;
167 18         186 my $memory = CPU::Z80::Disassembler::Memory->new;
168 18         217 my $type = CPU::Z80::Disassembler::Memory->new;
169 18         225 my $labels = CPU::Z80::Disassembler::Labels->new;
170 18         2334 return bless { memory => $memory,
171             _type => $type,
172             instr => [],
173             labels => $labels,
174             _call_instr => {},
175             _can_call => {},
176             _block_comments => [],
177             }, $class;
178             }
179             #------------------------------------------------------------------------------
180              
181             =head2 write_dump
182              
183             Outputs a disassembly dump on the given file, or standard output if no file
184             provided.
185              
186             The disassembly dump shows the address and bytes of each instruction with
187             the disassembled instruction.
188              
189             =cut
190              
191             #------------------------------------------------------------------------------
192              
193             sub write_dump {
194 9     9 1 5810 my($self, $file) = @_;
195              
196 9         41 my $fh = _opt_output_fh($file);
197            
198 9         49 my $it = $self->memory->loaded_iter;
199 9         22 my $instr;
200            
201 9         28 while (my($min, $max) = $it->()) {
202 7         27 for (my $addr = $min; $addr <= $max; $addr = $instr->next_addr) {
203             # either a Z80 instruction, or, if not found, a defb
204 42000   66     811460 $instr = CPU::Z80::Disassembler::Instruction->disassemble(
205             $self->memory, $addr)
206             || CPU::Z80::Disassembler::Instruction->defb(
207             $self->memory, $addr);
208 42000         119614 print $fh $instr->dump;
209             }
210             }
211             }
212              
213             #------------------------------------------------------------------------------
214              
215             =head2 analyse
216              
217             Analyse the disassembled information looking for higher level constructs.
218             For example, it replaces 'ld c,(hl):inc hl' by 'ldi c,(hl)'.
219              
220             Should be called immediately before C.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225             sub analyse {
226 1     1 1 33 my($self) = @_;
227            
228             # search for composed instructions
229 1         6 my $it = $self->memory->loaded_iter;
230 1         7 my $limit_addr = $self->_limit_addr(0);
231 1         5 while (my($min, $max) = $it->()) {
232 1         6 for (my $addr = $min; $addr <= $max; ) {
233 8954         94913 my $instr = $self->instr->[$addr];
234 8954 50       86074 if (defined $instr) {
235 8954 100       18482 if ($instr->is_code) {
236            
237             # get address of next label
238 6598 100       70055 if ($addr >= $limit_addr) {
239 1036         2326 $limit_addr = $self->_limit_addr($addr + 1);
240             }
241            
242             # disassemble long instruction
243 6598         13820 my $long_instr = CPU::Z80::Disassembler::Instruction
244             ->disassemble($self->memory,
245             $addr, $limit_addr);
246 6598 100       15420 if ($instr->opcode ne $long_instr->opcode) {
247 193         3773 $instr = $self->_merge_instr($long_instr);
248             }
249             }
250 8954         148820 $addr += $instr->size; # both code and data
251             }
252             else {
253 0         0 $addr++; # undefined
254             }
255             }
256             }
257             }
258              
259             sub _merge_instr {
260 193     193   372 my($self, $new_instr) = @_;
261            
262 193         270 my @comments;
263 193 50       491 push @comments, $new_instr->comment if defined $new_instr->comment;
264 193         1989 for my $addr ($new_instr->addr .. $new_instr->next_addr - 1) {
265 511         6095 my $old_instr = $self->instr->[$addr];
266 511 100       5016 if ($old_instr) {
267             # copy comments
268 406 100       766 push @comments, $old_instr->comment if defined $old_instr->comment;
269            
270             # copy formats
271 406 50       7031 if (defined $old_instr->_format) {
272 0         0 for my $key (keys %{$old_instr->_format}) {
  0         0  
273             $new_instr->format->{$key} =
274 0         0 $old_instr->format->{$key};
275             }
276             }
277            
278             # delete old
279 406         3971 $self->instr->[$addr] = undef;
280             }
281             }
282 193 100       2954 $new_instr->comment(join("\n", @comments)) if @comments;
283 193         1764 $self->instr->[$new_instr->addr] = $new_instr;
284            
285 193         3656 return $new_instr;
286             }
287              
288             sub _limit_addr {
289 1037     1037   1947 my($self, $addr) = @_;
290 1037         2092 my $label = $self->labels->next_label($addr);
291 1037 50       12728 my $limit_addr = (defined $label) ? $label->addr : 0x10000;
292 1037         11983 return $limit_addr;
293             }
294              
295             #------------------------------------------------------------------------------
296              
297             =head2 write_asm
298              
299             Outputs a disassembly listing on the given file, or standard output if no file
300             provided.
301              
302             The disassembly listing can be assembled to obtain the original binary file.
303              
304             =cut
305              
306             #------------------------------------------------------------------------------
307             sub write_asm {
308 9     9 1 2053 my($self, $file) = @_;
309              
310 9         47 my $fh = _opt_output_fh($file);
311              
312 9         95 $self->_write_header($fh);
313            
314 9         48 my $comment_it = $self->_block_comments_iter;
315 9         52 my $it = $self->memory->loaded_iter;
316 9         45 while (my($min, $max) = $it->()) {
317 7         42 my $instr = CPU::Z80::Disassembler::Instruction
318             ->org($self->memory, $min);
319 7         56 print $fh $instr->asm;
320            
321 7         220 for (my $addr = $min; $addr <= $max; ) {
322             # block comments
323 15057         284307 print $fh $comment_it->($addr);
324            
325 15057         33985 $addr = $self->_write_instr($fh, $addr, $max);
326             }
327            
328 7         133 print $fh "\n";
329             }
330            
331             # final comments
332 9         60 print $fh $comment_it->();
333            
334 9 100       76 print $fh $self->footer if defined $self->footer;
335              
336 9         173 $self->_write_map($fh);
337 9         62 $self->_write_labels($fh);
338 9         199 $self->_write_check_calls($fh);
339             }
340              
341             #------------------------------------------------------------------------------
342             # iterator to return block comments up to given address
343             sub _block_comments_iter {
344 9     9   31 my($self) = @_;
345 9         20 my $i = 0;
346             return sub {
347 15066     15066   24841 my($addr) = @_;
348 15066         20356 my $max = $#{$self->_block_comments};
  15066         33218  
349 15066 100       146329 $addr = $max unless defined $addr;
350            
351 15066         23501 my $return = "";
352 15066   100     47701 while ($i <= $addr && $i <= $max) {
353 24562         45812 my $comment = $self->_block_comments->[$i++];
354 24562 100       264055 $return .= $comment if defined $comment;
355             }
356 15066         32178 $return;
357 9         87 };
358             }
359              
360             #------------------------------------------------------------------------------
361 6     6   53 use constant BPL => 16;
  6         14  
  6         29837  
362              
363             #------------------------------------------------------------------------------
364             # write the file header and the label equates
365             sub _write_header {
366 9     9   35 my($self, $fh) = @_;
367            
368 9         51 my $label_width = $self->labels->max_length + 1;
369            
370 9 100       316 print $fh $self->header if defined $self->header;
371            
372 9         215 my @labels = sort { $a->addr <=> $b->addr } $self->labels->search_all;
  14990         233408  
373 9         507 for my $label (@labels) {
374 2031 100       33055 next if defined $self->instr->[$label->addr]; # no need for EQU
375 120         2156 print $fh $label->equ_string($label_width);
376             }
377 9 100       172 print $fh "\n" if @labels;
378            
379             # create IX0 / IY0 base
380 9         28 my $printed_base;
381 9         58 for (['IX0', 'ix_base'], ['IY0', 'iy_base']) {
382 18         53 my($base, $func) = @$_;
383 18         93 my $addr = $self->$func;
384 18 100       219 if (defined $addr) {
385 2         14 my $label = $self->labels->search_addr($addr);
386 2 50       38 if (defined $label) {
387 2         8 $addr = $label->name;
388             }
389             else {
390 0         0 $addr = format_hex4($addr);
391             }
392            
393 2         38 print $fh sprintf("%-*s equ %s\n", $label_width-1, $base, $addr);
394 2         10 $printed_base++;
395             }
396             }
397              
398 9 100       122 print $fh "\n" if $printed_base;
399             }
400            
401             #------------------------------------------------------------------------------
402             # write one instruction
403             sub _write_instr {
404 15057     15057   27208 my($self, $fh, $addr, $max) = @_;
405            
406             # label
407 15057         31439 my $label = $self->labels->search_addr($addr);
408 15057 100       155459 print $fh "\n", $label->label_string if (defined $label);
409              
410 15057         32176 my $instr = $self->instr->[$addr];
411 15057 100       149445 if (defined $instr) {
412             # instruction
413 15018 100 100     32601 if (defined($instr->NN) && !defined($instr->format->{NN})) {
    100 66        
414             # nac the special case of 16-bit (defw) values which can
415             # nac potentially be converted to a label
416 1172 100       12754 if (ref($instr->NN)) {
417 215         2075 my $max = scalar(@{$instr->NN});
  215         420  
418 215         2199 for (my $i=0; $i<$max; $i++) {
419 219         502 my $NN = $instr->NN->[$i];
420 219         2144 my $ref_label = $self->labels->search_addr($NN);
421 219 100       2503 if (defined($ref_label)) {
422 216         557 $instr->NN->[$i] = $ref_label->name;
423             $instr->format->{NN} =
424 219     219   475 sub { my $foo=shift;
425 219 100       645 if (/^\d+$/) {return format_hex4($foo)}
  3         9  
426 216         1138 else {return $foo}
427 216         4858 };
428             }
429             }
430             }
431             else {
432 957         9223 my $NN = $instr->NN;
433 957         8809 my $ref_label = $self->labels->search_addr($NN);
434 957 100       9826 if (defined($ref_label)) {
435 646     646   3486 $instr->format->{NN} = sub { $ref_label->name };
  646         1676  
436             }
437             }
438             }
439             elsif (defined($instr->DIS) && !defined($instr->format->{DIS})) {
440 379         5011 for (['ix', 'ix_base'], ['iy', 'iy_base']) {
441 758         10459 my($reg, $func) = @$_;
442 758 100 100     1632 if ($instr->opcode =~ /$reg/ && defined(my $base = $self->$func)) {
443 292         8043 my $addr = $base + $instr->DIS;
444 292         2877 my $ref_label = $self->labels->search_addr($addr);
445 292 100       3048 if (defined $ref_label) {
446             $instr->format->{DIS} =
447 269     269   1937 sub { '+'.$ref_label->name.'-'.uc($reg).'0' };
  269         774  
448             }
449             }
450             }
451             }
452 15018         283529 print $fh $instr->asm;
453            
454 15018         231261 return $instr->next_addr;
455             }
456             else {
457             # block of defb
458              
459             # search for next defined instr
460 39         85 my $p;
461 39   100     157 for ($p = $addr; $p <= $max && ! defined($self->instr->[$p]) ; $p++) {
462             ;
463             }
464              
465 39         350086 my $comment = "unknown area ".format_hex4($addr)." to ".format_hex4($p-1);
466 39         137 print $fh "\n", " " x 8, "; Start of $comment\n";
467            
468             # print for $addr in blocks of 16
469 39         99 while ($addr < $p) {
470 2496         4483 my $max_count = $p - $addr;
471 2496         4101 my $count = BPL - ($addr % BPL); # until end of addr block
472 2496 100       5534 $count = $max_count if $count > $max_count; # until $p
473            
474 2496         6390 my $instr = CPU::Z80::Disassembler::Instruction
475             ->defb($self->memory, $addr, $count);
476 2496         7359 print $fh $instr->asm;
477 2496         41888 $addr += $count;
478             }
479              
480 39         137 print $fh " " x 8, "; End of $comment\n\n";
481            
482 39         157 return $addr;
483             }
484             }
485              
486             #------------------------------------------------------------------------------
487             sub _write_map {
488 9     9   35 my($self, $fh) = @_;
489            
490 9         41 my $it = $self->memory->loaded_iter;
491 9         44 while (my($min, $max) = $it->()) {
492 7         47 for my $addr ($min .. $max-1) {
493 65553 100 100     208488 if ($addr == $min || ($addr % 0x50) == 0) {
494 823         2325 print $fh "\n; ", format_hex4($addr), " ";
495             }
496 65553         122404 print $fh $self->get_type($addr);
497             }
498 7         65 print $fh "\n";
499             }
500             }
501              
502             #------------------------------------------------------------------------------
503             sub _write_labels {
504 9     9   39 my($self, $fh) = @_;
505            
506 9         44 my @labels = $self->labels->search_all;
507 9 100       494 return unless @labels;
508            
509 6         43 my $len = $self->labels->max_length;
510            
511 6         184 my @by_name = sort { lc($a->name) cmp lc($b->name) } @labels;
  6915         108733  
512 6         377 my @by_addr = sort { $a->addr <=> $b->addr } @labels;
  14990         233476  
513              
514 6         356 print $fh "\n; Labels\n;\n";
515 6         49 for (0 .. $#labels) {
516 2031         4487 print $fh "; ", format_hex4($by_addr[$_]->addr), " => ",
517             sprintf("%-${len}s", $by_addr[$_]->name),
518             " " x 8,
519             sprintf("%-${len}s", $by_name[$_]->name), " => ",
520             format_hex4($by_name[$_]->addr), "\n";
521             }
522             }
523              
524             #------------------------------------------------------------------------------
525             sub _write_check_calls {
526 9     9   31 my($self, $fh) = @_;
527              
528 9         29 my %unknown_calls;
529 9         25 for my $addr (keys %{$self->_can_call}) {
  9         93  
530 359 100       3750 $unknown_calls{$addr}++ unless $self->_can_call->{$addr};
531             }
532 9         136 for my $addr (keys %{$self->_call_instr}) {
  9         39  
533 10         128 my $instr = $self->_get_instr($addr);
534 10         131 $unknown_calls{$instr->NN}++;
535             }
536            
537 9 100       445 if (%unknown_calls) {
538             print $fh "\n\n; Check these calls manualy: ",
539 3         19 join(", ", sort map {format_hex4($_)} keys %unknown_calls),
  22         46  
540             "\n\n";
541             }
542             }
543              
544             #------------------------------------------------------------------------------
545             sub _opt_output_fh {
546 18     18   65 my($file) = @_;
547            
548             # open file
549 18         38 my $fh;
550 18 100       64 if (defined $file) {
551 15 50       1679 open($fh, ">", $file) or croak("write $file: $!");
552             }
553             else {
554 3         7 $fh = \*STDOUT;
555             }
556              
557 18         81 $fh;
558             }
559              
560             #------------------------------------------------------------------------------
561              
562             =head2 set_type_code, set_type_byte, set_type_word
563              
564             Sets the type of the given address. An optional count allows the definitions of
565             the type of consecutive memory locations.
566              
567             It is an error to set a type of a not-defined memory location,
568             or to redefine a type.
569              
570             =cut
571              
572             #------------------------------------------------------------------------------
573             sub _set_type {
574 17821     17821   41890 my($self, $type, $addr, $count) = @_;
575 17821   100     38980 $count ||= 1;
576            
577 17821 50       96745 croak("Invalid type $type") unless $type =~ /$TYPES_RE/;
578            
579 17821         45650 for ( ; $count > 0 ; $count--, $addr++ ) {
580 30574         183081 my $current_type = $self->get_type($addr);
581            
582 30573 100 100     80850 croak("Changing type of address ".format_hex4($addr)." from ".
      100        
583             "$current_type to $type")
584             if ($current_type ne TYPE_UNKNOWN &&
585             $type ne TYPE_UNKNOWN &&
586             $current_type ne $type);
587            
588 30572         69393 $self->_type->poke($addr, ord($type));
589             }
590             }
591 12201     12201 1 126397 sub set_type_code { shift->_set_type( TYPE_CODE, @_ ) }
592 4968     4968 1 52149 sub set_type_byte { shift->_set_type( TYPE_BYTE, @_ ) }
593 316     316 1 3475 sub set_type_word { shift->_set_type( TYPE_WORD, @_ ) }
594            
595             #------------------------------------------------------------------------------
596              
597             =head2 get_type
598              
599             Gets the type at the given address, one of TYPE_UNKNOWN, TYPE_CODE, TYPE_BYTE or
600             TYPE_WORD constants.
601              
602             It is an error to set a type of a not-defined memory location.
603              
604             =cut
605              
606             #------------------------------------------------------------------------------
607             sub get_type {
608 133057     133057 1 440565 my($self, $addr) = @_;
609            
610 133057 100       274060 croak("Getting type of unloaded memory at ".format_hex4($addr))
611             unless defined $self->memory->peek($addr);
612            
613 133056         1582642 my $current_type = $self->_type->peek($addr);
614 133056 100       1438565 $current_type = defined($current_type) ? chr($current_type) : TYPE_UNKNOWN;
615            
616 133056 50       571868 croak("Invalid type $current_type") unless $current_type =~ /$TYPES_RE/;
617            
618 133056         375823 return $current_type;
619             }
620              
621             #------------------------------------------------------------------------------
622              
623             =head2 set_call
624              
625             Declares a subroutine at the given address, either with no stack impact
626             (if 1 is passed as argument) or with a stack impact to be computed by the
627             given code reference. This function is called with $self and the address
628             after the call instruction as arguments and should return the next address(es)
629             where the code stream shall continue.
630              
631             =cut
632              
633             #------------------------------------------------------------------------------
634             sub set_call {
635 33     33 1 369 my($self, $addr, $can_call) = @_;
636 33         58 $self->_can_call->{$addr} = $can_call;
637             }
638              
639             #------------------------------------------------------------------------------
640              
641             =head2 code
642              
643             Declares the given address and all following instructions up to an unconditional
644             jump as a block of code, with an optional label.
645              
646             =cut
647              
648             #------------------------------------------------------------------------------
649             sub _get_instr {
650 157758     157758   300871 my($self, $addr) = @_;
651              
652             # read from cache or disassemble
653 157758   66     315451 $self->instr->[$addr] ||=
654             CPU::Z80::Disassembler::Instruction->disassemble($self->memory, $addr);
655             }
656            
657             sub code {
658 8593     8593 1 54090 my($self, $addr, $label) = @_;
659              
660 8593 100       19982 defined($label) and $self->labels->add($addr, $label);
661            
662 8593         17862 my @stack = ($addr); # all addresses to investigate
663            
664             # check calls
665 8593         20392 while (@stack) {
666             # follow all streams of code
667 8785         19061 while (@stack) {
668 23292         40888 my $addr = pop @stack;
669            
670             # if address is not loaded, assume a ROM entry point
671 23292 100       57628 if (!defined $self->memory->peek($addr)) {
672 2 50       37 if (!$self->labels->search_addr($addr)) {
673 0         0 my $instr = $self->labels->add($addr);
674             }
675 2         31 next;
676             }
677            
678             # skip if already checked
679 23290 100       294163 next if $self->get_type($addr) eq TYPE_CODE;
680            
681             # get instruction and mark as code
682 12198         28462 my $instr = $self->_get_instr($addr);
683 12198         37943 $self->set_type_code($addr, $instr->size);
684            
685             # create labels for branches (jump or call)
686 12198 100       153516 if ($instr->is_branch) {
687 3333         49176 my $branch_addr = $instr->NN;
688 3333         33979 my $label = $self->labels->add($branch_addr, undef, $addr);
689 3333     2510   20609 $instr->format->{NN} = sub { $label->name };
  2510         7321  
690             }
691            
692             # check call / rst addresses
693 12198 100       156972 if ($instr->is_call) {
694 1747         24331 my $call_addr = $instr->NN;
695 1747         18223 my $can_call = $self->_can_call->{$call_addr};
696 1747 100       20753 if (! defined $can_call) {
    100          
    100          
697 399         1010 $self->_call_instr->{$addr}++; # mark road block
698             }
699             elsif (ref $can_call) {
700 116         359 push @stack, $can_call->($self, $instr->next_addr);
701             # call sub to handle impact
702             }
703             elsif ($can_call) {
704 1156         3206 push @stack, $instr->next_addr; # can continue
705             }
706             }
707            
708             # continue on next addresses
709 12198         153423 push @stack, $instr->next_code;
710             }
711            
712             # check if we can unwind any blocked calls, after all paths without calls are
713             # exhausted
714 8785         20914 push @stack, $self->_check_call_instr;
715             }
716             }
717              
718             #------------------------------------------------------------------------------
719             sub _check_call_instr {
720 8785     8785   16666 my($self) = @_;
721              
722 8785         13965 my @stack;
723            
724             # check simple call instructions where we blocked
725 8785         12795 for my $addr (keys %{$self->_call_instr}) {
  8785         21539  
726 13620         65293 my $instr = $self->_get_instr($addr);
727 13620         157087 my $call_addr = $instr->NN;
728            
729 13620 100 66     129416 if ( # if any of the calls is conditional, then _can_call
730             $instr->opcode =~ /call \w+,NN/
731             ||
732             # if address after the call is CODE, then _can_call
733             $self->get_type($instr->next_addr) eq TYPE_CODE
734             ) {
735            
736             # mark for later; do not call code() directly because we are
737             # iterating over _call_instr that might be changed by code()
738 48         225 $self->_can_call->{$call_addr} = 1;
739 48         641 push @stack, $instr->next_addr; # code from here
740 48         940 delete $self->_call_instr->{$addr}; # processed
741             }
742             }
743            
744             # check remaining by following code flow
745 8785         59809 for my $addr (keys %{$self->_call_instr}) {
  8785         19647  
746 13572         63700 my $instr = $self->_get_instr($addr);
747 13572         153418 my $call_addr = $instr->NN;
748            
749             # if call flow in called subroutine
750             # does not pop return address, than _can_call
751 13572         127465 my $can_call = $self->_check_call($call_addr);
752 13572 100       36035 if (defined $can_call) {
753 336         809 $self->_can_call->{$call_addr} = $can_call;
754 336         3569 push @stack, $addr; # re-check call to call can_call
755 336         914 $self->_set_type(TYPE_UNKNOWN, $addr, $instr->size);
756             # allow recheck to happen
757 336         4413 delete $self->_call_instr->{$addr}; # processed
758             }
759             }
760            
761 8785         98660 return @stack;
762             }
763              
764             #------------------------------------------------------------------------------
765             sub _check_call {
766 13572     13572   24895 my($self, $call_addr) = @_;
767            
768 13572         20913 my %seen; # addresses we have checked
769 13572         22839 my($addr, $sp_level) = ($call_addr, 0);
770 13572         32384 my @stack = ([$addr, $sp_level]); # all addresses to investigate
771            
772             # follow code
773 13572         30707 while (@stack) {
774 133241         1902393 ($addr, $sp_level) = @{pop @stack};
  133241         254865  
775 133241 100       391865 next if $seen{$addr}++; # prevent loops
776            
777             # run into some known code
778 118415         274435 my $can_call = $self->_can_call->{$addr};
779 118415 100       1154139 if (defined $can_call) {
780 82 100       488 return $can_call if $sp_level == 0;
781             }
782              
783             # if address is not loaded, return "dont know"
784 118334 100       239250 if (!defined $self->memory->peek($addr)) {
785 1         16 return undef;
786             }
787              
788             # get the instruction
789 118333         1379609 my $instr = $self->_get_instr($addr);
790 118333         1233683 local $_ = $instr->opcode;
791            
792             # check stack impact
793 118333 100       1482369 if (/ret/) {
    100          
    100          
    50          
    50          
    100          
    100          
794 245 100       1805 return 1 if $sp_level == 0; # can call if stack empty
795             }
796             elsif (/push/) {
797 15341         26136 $sp_level += 2;
798             }
799             elsif (/pop/) {
800 2318         4248 $sp_level -= 2;
801 2318 100       5204 return 0 if $sp_level < 0; # STACK IMPACT!
802             }
803             elsif (/dec sp/) {
804 0         0 $sp_level++;
805             }
806             elsif (/inc sp/) {
807 0         0 $sp_level--;
808 0 0       0 return 0 if $sp_level < 0; # STACK IMPACT!
809             }
810             elsif (/ex \(sp\),/) {
811 26 100       103 return 0 if $sp_level < 2; # STACK IMPACT!
812             }
813             elsif (/ld sp/) {
814 1         9 return 0; # STACK IMPACT!
815             }
816            
817             # continue on next address, but dont follow calls
818 118078 100       274633 if ($instr->is_call) {
    100          
819 15391         199242 my $can_call = $self->_can_call->{$instr->NN};
820 15391 100 100     299944 if (defined($can_call) && !ref($can_call) && $can_call) {
      100        
821 2468         7128 push @stack, [$instr->next_addr, $sp_level]; # continue after call
822             }
823             }
824             elsif ($instr->is_branch) {
825 17399         221811 push @stack, [$instr->NN, $sp_level];
826             }
827            
828 118078 100       1230285 push @stack, [$instr->next_addr, $sp_level] unless $instr->is_break_flow;
829             }
830            
831 13235         173937 return undef; # don't know
832             }
833              
834             #------------------------------------------------------------------------------
835              
836             =head2 defb, defb2, defw, defm, defmz, defm7
837              
838             Declares the given address as a def* instruction
839             with an optional label.
840              
841             =cut
842              
843             #------------------------------------------------------------------------------
844             sub _def {
845 5283     5283   11093 my($self, $factory, $set_type,
846             $addr, $count, $label) = @_;
847              
848 5283 100       11126 defined($label) and $self->labels->add($addr, $label);
849            
850 5283         13487 my $instr = CPU::Z80::Disassembler::Instruction
851             ->$factory($self->memory, $addr, $count);
852 5283         16941 $self->instr->[$addr] = $instr;
853 5283         56742 $self->$set_type($addr, $instr->size);
854            
855 5283         65475 return $instr;
856             }
857              
858             sub defb {
859 4761     4761 1 32460 my($self, $addr, $count, $label) = @_;
860 4761         10446 $self->_def('defb', 'set_type_byte', $addr, $count, $label);
861             }
862              
863             sub defb2 {
864 0     0 1 0 my($self, $addr, $count, $label) = @_;
865 0         0 $self->_def('defb2', 'set_type_byte', $addr, $count, $label);
866             }
867              
868             sub defw {
869 316     316 1 978 my($self, $addr, $count, $label) = @_;
870 316         761 $self->_def('defw', 'set_type_word', $addr, $count, $label);
871             }
872              
873             sub defm {
874 78     78 1 1048 my($self, $addr, $length, $label) = @_;
875 78         158 $self->_def('defm', 'set_type_byte', $addr, $length, $label);
876             }
877              
878             sub defmz {
879 0     0 1 0 my($self, $addr, $count, $label) = @_;
880 0         0 $self->_def('defmz', 'set_type_byte', $addr, $count, $label);
881             }
882              
883             sub defm7 {
884 128     128 1 2447 my($self, $addr, $count, $label) = @_;
885 128         285 $self->_def('defm7', 'set_type_byte', $addr, $count, $label);
886             }
887              
888             #------------------------------------------------------------------------------
889              
890             =head2 block_comment
891              
892             Creates a block comment to insert before the given address.
893              
894             =cut
895              
896             #------------------------------------------------------------------------------
897             sub block_comment {
898 12525     12525 1 245135 my($self, $addr, $block_comment) = @_;
899            
900 12525 100       27088 if (defined $block_comment) {
901 6060         9279 chomp($block_comment);
902 6060   100     12440 $self->_block_comments->[$addr] ||= "";
903 6060         70717 $self->_block_comments->[$addr] .= "$block_comment\n";
904             }
905             }
906              
907             #------------------------------------------------------------------------------
908              
909             =head2 line_comments
910              
911             Appends each of the given line comments to the instrutions starting at
912             the given address, one comment per instruction.
913              
914             =cut
915              
916             #------------------------------------------------------------------------------
917             sub line_comments {
918 6747     6747 1 16150 my($self, $addr, @line_comments) = @_;
919            
920 6747         13954 for (@line_comments) {
921 6747         16795 my $instr = $self->instr->[$addr];
922 6747 100       77359 croak("Cannot set comment of unknown instruction at ".format_hex4($addr))
923             unless $instr;
924 6746   100     16499 my $old_comment = $instr->comment // "";
925 6746 100       77065 $old_comment .= "\n" if $old_comment;
926 6746         22597 $instr->comment($old_comment . $_);
927 6746         80273 $addr += $instr->size;
928             }
929             }
930              
931             #------------------------------------------------------------------------------
932              
933             =head2 relative_arg
934              
935             Shows the instruction argument (NN or N) relative to a given label name.
936             Label name can be '$' for a value relative to the instruction pointer.
937              
938             =cut
939              
940             #------------------------------------------------------------------------------
941              
942             =head2 create_control_file
943              
944             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
945              
946             Creates a new control file for the given input binary file, starting at the given address
947             and for the given architecture.
948              
949             The address defaults to zero, and the architecture to undefined. The architecture may be
950             implemented in the future, for example to define system variable equates for the given
951             architecture.
952              
953             It is an error to overwrite a control file.
954              
955             The Control File is the input file for a disassembly run in an interactive disassembly
956             session, and the outout is the .asm. After each run, the user studies the output
957             .asm file, and includes new commands in the control file to add information to the
958             .asm file on the next run.
959              
960             This function creates a template control file that contains just the hex dump of the
961             binary file and the decoded assembly instruction at each address, e.g.
962              
963             0000 :F
964             0000 D3FD out ($FD),a
965             0002 01FF7F ld bc,$7FFF
966             0005 C3CB03 jp $03CB
967              
968             The control file commands start with a ':' and refer to the hexadecimal address at the
969             start of the line.
970              
971             Some commands operate on a range of addresses and accept the inclusive range limits separated
972             by a single '-'.
973              
974             A line starting with a blank uses the same address as the previous command.
975              
976             A semicolon starts a comment in the control file.
977              
978             0000 :; define next address as 0x0000
979             : ; at the same address 0x0000
980             0000-001F :B ; define a range address of bytes
981              
982             The dump between the address and the ':' is ignored and is helpfull as a guide while adding
983             information to the control file.
984              
985             =head2 load_control_file
986              
987             $dis->load_control_file($ctl_file);
988              
989             Load the control file created by and subsequently edited by the user
990             and create a new .asm disassembly file.
991              
992             =head1 Control File commands
993              
994             =head2 Include
995              
996             Include another control file at the current location.
997              
998             #include vars.ctl
999              
1000             =head2 File
1001              
1002             Load a binary file at the given address.
1003              
1004             0000 :F zx81.rom
1005              
1006             =head2 Code
1007              
1008             Define the start of a code routine, with an optional label. The code is not known to be
1009             stack-safe, i.e. not to have data bytes following the call instruction. The disassembler
1010             stops disassembly when it cannot determine if the bytes after a call instruction are
1011             data or code.
1012              
1013             0000 :C START
1014              
1015             =head2 Procedure
1016              
1017             Define the start of a procedure with a possible list of arguments following the call
1018             instruction.
1019              
1020             The signature is a list of {'B','W','C'}+, identifing each of the following items
1021             after the call instruction (Byte, Word or Code). In the following example the call
1022             istruction is followed by one byte and one word, and the procedure returns
1023             to the address after the word.
1024              
1025             0000 P proc B,W,C
1026              
1027             The signature defaults to a single 'C', meaning the procedure returns to the point after call.
1028              
1029             A signature without a 'C' means that the call never returns.
1030              
1031             =head2 Bytes and Words
1032              
1033             Define data bytes and words in the given address range.
1034              
1035             0000-0003 :B label
1036             0000-0003 :B label
1037             0000-0003 :B2[1] label ; one byte per line, binary data
1038             0000-0003 :W label
1039              
1040             =head2 Define a symbol
1041              
1042             Define the name of a symbol.
1043              
1044             4000 := ERR_NO comment\nline 2 of comment
1045              
1046             =head2 IX and IY base
1047              
1048             Define base address for IX and IY indexed mode.
1049              
1050             4000 :IX
1051             4000 :IY
1052              
1053             =head2 Header block
1054              
1055             Define a text block to be output before the given address. The block is inserted vervbatin,
1056             so include ';' if a comment is intended.
1057              
1058             0000 :# ; header
1059             :# ; continuation
1060             :# abc EQU 23
1061              
1062             =head2 Line comment
1063              
1064             Define a line comment to show at the given address.
1065              
1066             0000 :; comment
1067              
1068             =head2 Header and Footer
1069              
1070             Define a text block to be output at the top and the bottom of the assembly file.
1071             The block is inserted vervbatin, so include ';' if a comment is intended.
1072              
1073             0000 :< ; header
1074             :< ; continuation
1075             :> ; footer
1076              
1077             =cut
1078              
1079             #------------------------------------------------------------------------------
1080              
1081             sub _find_file {
1082 5     5   184 my($self, $from_file, $include_file) = @_;
1083            
1084 5 100       129 return $include_file if -f $include_file;
1085            
1086             # test relative to parent
1087 1         16 my $relative = path(path($from_file)->parent, path($include_file)->basename);
1088 1 50       267 return $relative if -f $relative;
1089            
1090 0         0 return $from_file;
1091             }
1092              
1093             #------------------------------------------------------------------------------
1094              
1095             sub create_control_file {
1096 5     5 1 27670792 my($class, $ctl_file, $bin_file, $addr, $arch) = @_;
1097            
1098 5 100       114 -f $ctl_file and die "Error: $ctl_file exists\n";
1099            
1100 4         42 my $dis = $class->new;
1101 4         30 $dis->memory->load_file($bin_file, $addr);
1102 4         85 $dis->write_dump($ctl_file);
1103 4         70 my @lines = ( <
1104             ;------------------------------------------------------------------------------
1105             ; CPU::Z80::Disassembler control file
1106             ;------------------------------------------------------------------------------
1107              
1108             END
1109             sprintf("%04X :F $bin_file\n\n", $addr),
1110             path($ctl_file)->lines
1111             );
1112 4         20106 path($ctl_file)->spew(@lines);
1113             }
1114              
1115             #------------------------------------------------------------------------------
1116              
1117             sub load_control_file {
1118 5     5 1 445 my($self, $file) = @_;
1119            
1120 5         19 my $addr = 0; my $end_addr = 0;
  5         19  
1121 5 50       225 open(my $fh, $file) or die "cannot open $file\n";
1122 5         268 while (<$fh>) {
1123 41525         190026 chomp;
1124 41525         84581 s/^\s*;.*$//; # remove comments
1125 41525         93724 s/\s+$//;
1126 41525 100       110068 next unless /\S/;
1127              
1128 36285 100       72985 if (/^ \#include \s+ (\S+) /ix) {
1129 1         4 $self->load_control_file($self->_find_file($file, $1));
1130             }
1131             else {
1132             # decode start address
1133 36284 100       114600 if (s/^ ([0-9a-f]+) //ix) {
1134 26213         58467 $addr = hex($1);
1135             }
1136              
1137             # decode end address
1138 36284         59093 $end_addr = $addr;
1139 36284 100       72358 if (s/^ -([0-9a-f]+) //ix) {
1140 1463         2928 $end_addr = hex($1);
1141             }
1142              
1143             # remove all chars up to ':', ignore lines without ':'
1144 36284 100       108471 /:\s*/ or next;
1145 20549         51765 $_ = $';
1146 20549 50       49468 next unless /\S/;
1147            
1148             # decode command
1149 20549         33387 my($include_file, $label, $comment, $signature, $type);
1150            
1151             # File
1152 20549 100       139240 if (($include_file) = /^ F \s+ (\S+) /ix) {
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
1153 4         29 $self->memory->load_file($self->_find_file($file, $include_file), $addr);
1154             }
1155            
1156             # Code
1157             elsif (($label) = /^ C \s* (\w+)? /ix) {
1158 7506         19974 $self->code($addr, $label);
1159             }
1160            
1161             # Define label
1162             elsif (($label, $comment) = /^ = \s+ (\S+) \s* ;? \s*(.*)/ix) {
1163 40         80 $comment =~ s/ \\ n /\n/gx;
1164 40         107 my $instr = $self->labels->add($addr, $label);
1165 40 50       110 $instr->comment($comment) if $comment;
1166             }
1167            
1168             # Block comment
1169             elsif (($comment) = /^ \# \s? (.*)/ix) {
1170 3270         7736 $self->block_comment($addr, $comment);
1171             }
1172            
1173             # Header
1174             elsif (($comment) = /^ \< \s? (.*)/ix) {
1175 56   100     155 my $header = $self->header // "";
1176 56 100       678 $header .= "\n" if $header;
1177 56         232 $self->header($header.$comment);
1178             }
1179            
1180             # Footer
1181             elsif (($comment) = /^ \> \s? (.*)/ix) {
1182 0   0     0 my $footer = $self->footer // "";
1183 0 0       0 $footer .= "\n" if $footer;
1184 0         0 $self->footer($footer.$comment);
1185             }
1186            
1187             # Line comment
1188             elsif (($comment) = /^ \; [\s;]* (.*)/ix) {
1189 6746         17463 $self->line_comments($addr, $comment);
1190             }
1191            
1192             # Procedure
1193             elsif (($label, $signature) = /^ P \s+ (\w+) \s* (.*)/ix) {
1194 0         0 $self->code($addr, $label);
1195 0         0 $signature =~ s/,/ /g;
1196 0         0 my @types = split(' ', $signature);
1197 0 0       0 @types = ('C') if !@types;
1198             $self->set_call($addr, sub {
1199 0     0   0 my($self, $addr) = @_;
1200 0         0 for (@types) {
1201 0 0       0 if ($_ eq 'B') {
    0          
    0          
1202 0         0 $self->defb($addr);
1203 0         0 $addr++
1204             }
1205             elsif ($_ eq 'W') {
1206 0         0 $self->defW($addr);
1207 0         0 $addr += 2;
1208             }
1209             elsif ($_ eq 'C') {
1210 0         0 return $addr;
1211             }
1212             else {
1213 0         0 die "procedure argument type $_ unknown";
1214             }
1215             }
1216 0         0 return;
1217 0         0 });
1218             }
1219            
1220             # Byte | Word
1221             elsif (my($type, $ipl, $label) = /^ (B2 | B | W | M) (?: \[ (\d+) \] )? \s* (\w+)?/ix) {
1222 2926 100       7215 $self->labels->add($addr, $label) if defined $label;
1223 2926 50       6002 $ipl = 16 unless $ipl;
1224              
1225 2926         4908 my($func, $size);
1226 2926 100       5981 if ($type eq 'B') { ($func, $size) = ('defb', 1); }
  2724 50       5335  
    50          
    0          
1227 0         0 elsif ($type eq 'B2') { ($func, $size) = ('defb2', 1); }
1228 202         407 elsif ($type eq 'W') { ($func, $size) = ('defw', 2); }
1229 0         0 elsif ($type eq 'M') { ($func, $size) = ('defm', 1); $ipl = 32; }
  0         0  
1230 0         0 else { die "type $type unknown"; }
1231            
1232 2926 100 100     7086 if ($size == 2 && $addr == $end_addr) {
1233 101         167 $end_addr++; # a word uses two addresses
1234             }
1235            
1236 2926         6503 for (my $a = $addr; $a <= $end_addr; ) {
1237 2926         7187 my $items = int(($end_addr - $a + 1) / $size);
1238 2926 50       5866 $items = $ipl if $items > $ipl;
1239            
1240 2926         9307 $self->$func($a, $items);
1241 2926         18088 $a += $size * $items;
1242             }
1243             }
1244            
1245             # IX
1246             elsif (/^ IX /ix) {
1247 0         0 $self->ix_base($addr);
1248             }
1249            
1250             # IY
1251             elsif (/^ IY /ix) {
1252 1         7 $self->iy_base($addr);
1253             }
1254            
1255             # undefined
1256             else {
1257 0         0 die "Load '$file': cannot parse '$_'";
1258             }
1259             }
1260             }
1261             }
1262            
1263             #------------------------------------------------------------------------------
1264             sub relative_arg {
1265 25     25 1 420 my($self, $addr, $label_name) = @_;
1266              
1267             # disassemble from here, if needed
1268 25         76 $self->code($addr);
1269 25 50       83 my $instr = $self->_get_instr($addr) or die;
1270            
1271 25         311 my $label_addr;
1272 25 100       56 if ($label_name eq '$') {
1273 3         12 $label_addr = $instr->addr;
1274             }
1275             else {
1276 22 100       53 my $label = $self->labels->search_name($label_name)
1277             or croak("Label '$label_name' not found");
1278 21         315 $label_addr = $label->addr;
1279             }
1280            
1281 24 100       278 my $NN = defined($instr->NN) ? 'NN' :
    100          
1282             defined($instr->N ) ? 'N' :
1283             croak("Instruction at address ".format_hex4($addr).
1284             " has no arguments");
1285 23         303 my $arg = $instr->$NN;
1286 23 50       248 $arg = [$arg] unless ref $arg; # defb stores as [N]
1287            
1288 23         46 my $delta = $arg->[0] - $label_addr;
1289 23         72 my $expr = $label_name . format_dis($delta);
1290 23     23   153 $instr->format->{$NN} = sub { $expr };
  23         200  
1291             }
1292              
1293             #------------------------------------------------------------------------------
1294              
1295             =head1 ACKNOWLEDGEMENTS
1296              
1297             =head1 AUTHOR
1298              
1299             Paulo Custodio, C<< >>
1300              
1301             =head1 BUGS and FEEDBACK
1302              
1303             Please report any bugs or feature requests through
1304             the web interface at
1305             L.
1306              
1307             =head1 LICENSE AND COPYRIGHT
1308              
1309             Copyright 2010 Paulo Custodio.
1310              
1311             This program is free software; you can redistribute it and/or modify it
1312             under the terms of either: the GNU General Public License as published
1313             by the Free Software Foundation; or the Artistic License.
1314              
1315             See http://dev.perl.org/licenses/ for more information.
1316              
1317             The Spectrum 48K ROM used in the test scripts is Copyright by Amstrad.
1318             Amstrad have kindly given their permission for the
1319             redistribution of their copyrighted material but retain that copyright
1320             (see L).
1321              
1322             =cut
1323              
1324             1;