File Coverage

blib/lib/CPU/Z80/Disassembler.pm
Criterion Covered Total %
statement 415 453 91.6
branch 195 234 83.3
condition 46 52 88.4
subroutine 55 58 94.8
pod 21 21 100.0
total 732 818 89.4


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler - Disassemble the flow of a Z80 program
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 6     6   792777 use strict;
  6         33  
  6         146  
14 6     6   26 use warnings;
  6         12  
  6         124  
15              
16 6     6   24 use Carp; our @CARP_NOT; # do not report errors in this package
  6         16  
  6         362  
17              
18 6     6   2565 use CPU::Z80::Disassembler::Memory;
  6         30  
  6         30  
19 6     6   2548 use CPU::Z80::Disassembler::Instruction;
  6         15  
  6         32  
20 6     6   195 use CPU::Z80::Disassembler::Format;
  6         12  
  6         314  
21 6     6   2772 use CPU::Z80::Disassembler::Labels;
  6         14  
  6         29  
22              
23 6     6   2307 use Path::Tiny;
  6         29656  
  6         390  
24              
25             our $VERSION = '1.02';
26              
27             #------------------------------------------------------------------------------
28              
29             =head1 SYNOPSIS
30              
31             use CPU::Z80::Disassembler;
32             $dis = CPU::Z80::Disassembler->new;
33             $dis->memory->load_file($file_name, $addr, $opt_skip_bytes, $opt_length);
34             $dis->write_dump; $dis->write_dump($file);
35             $dis->analyse;
36             $dis->write_asm; $dis->write_asm($file);
37              
38             $dis->get_type($addr);
39             $dis->set_type_code($addr [,$count]);
40             $dis->set_type_byte($addr [,$count]);
41             $dis->set_type_word($addr [,$count]);
42              
43             $dis->set_call($addr, 1); # this may be called
44             $dis->set_call($addr, $sub); # @next_code = $sub->($self, $next_addr) will be called
45              
46             $dis->code($addr [, $label]);
47             $dis->defb($addr [, $count][, $label]);
48             $dis->defw($addr [, $count][, $label]);
49             $dis->defm($addr, $size [, $label]);
50             $dis->defmz($addr [, $count][, $label]);
51             $dis->defm7($addr [, $count][, $label]);
52              
53             $dis->block_comment($addr, $block_comment);
54             $dis->line_comments($addr, @line_comments);
55              
56             $dis->relative_arg($addr, $label_name);
57             $dis->ix_base($addr);
58             $dis->iy_base($addr);
59            
60             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
61             $dis->load_control_file($ctl_file);
62              
63             =head1 DESCRIPTION
64              
65             Implements a Z80 disassembler. Loads a binary file into memory and dumps
66             an unprocessed disassembly listing (see C).
67              
68             Alternatively there are functions to tell the disassembler where there are
69             data bytes and what are code entry points and labels. The disassembler will
70             follow the code by simulating a Z80 processor, to find out where the code region
71             finishes.
72              
73             As a C instruction may be followed by data, the disassembler tries to find
74             out if the called routine manipulates the return stack. If it does not, and ends
75             with a C, then the routine is considered safe, and the disassembly continues
76             after the C instruction. If the routine is not considered safe, a message is
77             written at the end of the disassembled file asking the used to check the
78             routines manually; the C method should then be used to tell how to
79             handle calls to that routine on the next iteration.
80              
81             The C function can be called just before dumping the output to try to find
82             higher level constructs in the assembly listing. For example, it transforms the
83             sequence C into C.
84              
85             The C dumps an assembly listing that can be re-assembled to obtain the
86             starting binary file. All the unknown region bytes are disassembled as C
87             instructions, and a map is shown at the end of the file with the code regions (C),
88             byte regions (C), word regions (C) and unknown regions (C<->).
89              
90             =head1 FUNCTIONS
91              
92             =head2 new
93              
94             Creates the object.
95              
96             =head2 memory
97              
98             L object
99             containing the memory being analysed.
100              
101             =head2 instr
102              
103             Reference to an array that contains all the disassembled instructions
104             as L
105             objects, indexed
106             by the address of the instruction. The entry is C if there is no
107             disassembled instruction at that address (either not known, or pointing to the second,
108             etc, bytes of a multi-byte instruction).
109              
110             =head2 labels
111              
112             Returns the L
113             object that contains all the defined labels.
114              
115             =head2 header, footer
116              
117             Attributes containing blocks of text to dump before and after the assembly listing.
118             They are used by C.
119              
120             =head2 ix_base, iy_base
121              
122             Base addess for (IX+DIS) and (IY+DIS) instructions, if constant in all the code.
123             Causes the disassembly to dump:
124              
125             IY0 equ 0xHHHH ; 0xHHHH is iy_base
126             ...
127             ld a,(iy+0xHHHH-IY0) ; 0xHHHH is the absolute address
128              
129             =cut
130              
131             #------------------------------------------------------------------------------
132             # Hold a disassembly session
133 6     6   37 use base 'Class::Accessor';
  6         9  
  6         555  
134             __PACKAGE__->mk_accessors(
135             'memory', # memory to disassemble
136             '_type', # identified type of each memory address, TYPE_xxx
137             'instr', # array of Instruction objects at each address
138             'labels', # all defined labels
139             '_call_instr', # hash of all call instructions where we are blocked
140             '_can_call', # hash of all subroutines we may call:
141             # 1 : can be called, no stack impact
142             # 0 : has stack impact, needs to be checked manually
143             # sub {} : call sub->($self, $next_addr) to handle
144             # stack impact and return next code addresses
145             # to continue disassembly after call
146             '_block_comments',
147             # array of block comment string at each address, printed before
148             # the address
149             'header', 'footer',
150             # header and footer sections of disassembled file
151             'ix_base', 'iy_base',
152             # base addess for (IX+DIS) and (IY+DIS)
153             );
154              
155 6     6   34 use constant TYPE_UNKNOWN => '-';
  6         10  
  6         267  
156 6     6   26 use constant TYPE_CODE => 'C';
  6         11  
  6         221  
157 6     6   28 use constant TYPE_BYTE => 'B';
  6         15  
  6         206  
158 6     6   27 use constant TYPE_WORD => 'W';
  6         10  
  6         400  
159             my $TYPES_RE = qr/^[-CBW]$/;
160              
161 6     6   35 use Exporter 'import';
  6         11  
  6         5165  
162             our @EXPORT = qw( TYPE_UNKNOWN TYPE_CODE TYPE_BYTE TYPE_WORD );
163              
164              
165             sub new {
166 18     18 1 9535747 my($class) = @_;
167 18         158 my $memory = CPU::Z80::Disassembler::Memory->new;
168 18         127 my $type = CPU::Z80::Disassembler::Memory->new;
169 18         204 my $labels = CPU::Z80::Disassembler::Labels->new;
170 18         1904 return bless { memory => $memory,
171             _type => $type,
172             instr => [],
173             labels => $labels,
174             _call_instr => {},
175             _can_call => {},
176             _block_comments => [],
177             }, $class;
178             }
179             #------------------------------------------------------------------------------
180              
181             =head2 write_dump
182              
183             Outputs a disassembly dump on the given file, or standard output if no file
184             provided.
185              
186             The disassembly dump shows the address and bytes of each instruction with
187             the disassembled instruction.
188              
189             =cut
190              
191             #------------------------------------------------------------------------------
192              
193             sub write_dump {
194 9     9 1 4634 my($self, $file) = @_;
195              
196 9         32 my $fh = _opt_output_fh($file);
197            
198 9         35 my $it = $self->memory->loaded_iter;
199 9         17 my $instr;
200            
201 9         28 while (my($min, $max) = $it->()) {
202 7         29 for (my $addr = $min; $addr <= $max; $addr = $instr->next_addr) {
203             # either a Z80 instruction, or, if not found, a defb
204 42000   66     657821 $instr = CPU::Z80::Disassembler::Instruction->disassemble(
205             $self->memory, $addr)
206             || CPU::Z80::Disassembler::Instruction->defb(
207             $self->memory, $addr);
208 42000         92114 print $fh $instr->dump;
209             }
210             }
211             }
212              
213             #------------------------------------------------------------------------------
214              
215             =head2 analyse
216              
217             Analyse the disassembled information looking for higher level constructs.
218             For example, it replaces 'ld c,(hl):inc hl' by 'ldi c,(hl)'.
219              
220             Should be called immediately before C.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225             sub analyse {
226 1     1 1 42 my($self) = @_;
227            
228             # search for composed instructions
229 1         5 my $it = $self->memory->loaded_iter;
230 1         8 my $limit_addr = $self->_limit_addr(0);
231 1         6 while (my($min, $max) = $it->()) {
232 1         5 for (my $addr = $min; $addr <= $max; ) {
233 8954         78029 my $instr = $self->instr->[$addr];
234 8954 50       71050 if (defined $instr) {
235 8954 100       15561 if ($instr->is_code) {
236            
237             # get address of next label
238 6598 100       55614 if ($addr >= $limit_addr) {
239 1036         1851 $limit_addr = $self->_limit_addr($addr + 1);
240             }
241            
242             # disassemble long instruction
243 6598         11894 my $long_instr = CPU::Z80::Disassembler::Instruction
244             ->disassemble($self->memory,
245             $addr, $limit_addr);
246 6598 100       12431 if ($instr->opcode ne $long_instr->opcode) {
247 193         2946 $instr = $self->_merge_instr($long_instr);
248             }
249             }
250 8954         118173 $addr += $instr->size; # both code and data
251             }
252             else {
253 0         0 $addr++; # undefined
254             }
255             }
256             }
257             }
258              
259             sub _merge_instr {
260 193     193   277 my($self, $new_instr) = @_;
261            
262 193         264 my @comments;
263 193 50       365 push @comments, $new_instr->comment if defined $new_instr->comment;
264 193         1623 for my $addr ($new_instr->addr .. $new_instr->next_addr - 1) {
265 511         5068 my $old_instr = $self->instr->[$addr];
266 511 100       3961 if ($old_instr) {
267             # copy comments
268 406 100       625 push @comments, $old_instr->comment if defined $old_instr->comment;
269            
270             # copy formats
271 406 50       5488 if (defined $old_instr->_format) {
272 0         0 for my $key (keys %{$old_instr->_format}) {
  0         0  
273             $new_instr->format->{$key} =
274 0         0 $old_instr->format->{$key};
275             }
276             }
277            
278             # delete old
279 406         3186 $self->instr->[$addr] = undef;
280             }
281             }
282 193 100       2251 $new_instr->comment(join("\n", @comments)) if @comments;
283 193         1459 $self->instr->[$new_instr->addr] = $new_instr;
284            
285 193         2931 return $new_instr;
286             }
287              
288             sub _limit_addr {
289 1037     1037   1587 my($self, $addr) = @_;
290 1037         1750 my $label = $self->labels->next_label($addr);
291 1037 50       9733 my $limit_addr = (defined $label) ? $label->addr : 0x10000;
292 1037         9263 return $limit_addr;
293             }
294              
295             #------------------------------------------------------------------------------
296              
297             =head2 write_asm
298              
299             Outputs a disassembly listing on the given file, or standard output if no file
300             provided.
301              
302             The disassembly listing can be assembled to obtain the original binary file.
303              
304             =cut
305              
306             #------------------------------------------------------------------------------
307             sub write_asm {
308 9     9 1 1510 my($self, $file) = @_;
309              
310 9         42 my $fh = _opt_output_fh($file);
311              
312 9         61 $self->_write_header($fh);
313            
314 9         30 my $comment_it = $self->_block_comments_iter;
315 9         42 my $it = $self->memory->loaded_iter;
316 9         31 while (my($min, $max) = $it->()) {
317 7         26 my $instr = CPU::Z80::Disassembler::Instruction
318             ->org($self->memory, $min);
319 7         47 print $fh $instr->asm;
320            
321 7         193 for (my $addr = $min; $addr <= $max; ) {
322             # block comments
323 15057         252248 print $fh $comment_it->($addr);
324            
325 15057         29791 $addr = $self->_write_instr($fh, $addr, $max);
326             }
327            
328 7         110 print $fh "\n";
329             }
330            
331             # final comments
332 9         28 print $fh $comment_it->();
333            
334 9 100       43 print $fh $self->footer if defined $self->footer;
335              
336 9         123 $self->_write_map($fh);
337 9         51 $self->_write_labels($fh);
338 9         185 $self->_write_check_calls($fh);
339             }
340              
341             #------------------------------------------------------------------------------
342             # iterator to return block comments up to given address
343             sub _block_comments_iter {
344 9     9   19 my($self) = @_;
345 9         17 my $i = 0;
346             return sub {
347 15066     15066   22230 my($addr) = @_;
348 15066         18223 my $max = $#{$self->_block_comments};
  15066         29774  
349 15066 100       133957 $addr = $max unless defined $addr;
350            
351 15066         20807 my $return = "";
352 15066   100     43583 while ($i <= $addr && $i <= $max) {
353 24562         42291 my $comment = $self->_block_comments->[$i++];
354 24562 100       243925 $return .= $comment if defined $comment;
355             }
356 15066         30093 $return;
357 9         73 };
358             }
359              
360             #------------------------------------------------------------------------------
361 6     6   40 use constant BPL => 16;
  6         12  
  6         24188  
362              
363             #------------------------------------------------------------------------------
364             # write the file header and the label equates
365             sub _write_header {
366 9     9   32 my($self, $fh) = @_;
367            
368 9         42 my $label_width = $self->labels->max_length + 1;
369            
370 9 100       226 print $fh $self->header if defined $self->header;
371            
372 9         150 my @labels = sort { $a->addr <=> $b->addr } $self->labels->search_all;
  14990         192328  
373 9         389 for my $label (@labels) {
374 2031 100       26686 next if defined $self->instr->[$label->addr]; # no need for EQU
375 120         1715 print $fh $label->equ_string($label_width);
376             }
377 9 100       134 print $fh "\n" if @labels;
378            
379             # create IX0 / IY0 base
380 9         20 my $printed_base;
381 9         48 for (['IX0', 'ix_base'], ['IY0', 'iy_base']) {
382 18         39 my($base, $func) = @$_;
383 18         68 my $addr = $self->$func;
384 18 100       184 if (defined $addr) {
385 2         12 my $label = $self->labels->search_addr($addr);
386 2 50       31 if (defined $label) {
387 2         9 $addr = $label->name;
388             }
389             else {
390 0         0 $addr = format_hex4($addr);
391             }
392            
393 2         24 print $fh sprintf("%-*s equ %s\n", $label_width-1, $base, $addr);
394 2         7 $printed_base++;
395             }
396             }
397              
398 9 100       94 print $fh "\n" if $printed_base;
399             }
400            
401             #------------------------------------------------------------------------------
402             # write one instruction
403             sub _write_instr {
404 15057     15057   25598 my($self, $fh, $addr, $max) = @_;
405            
406             # label
407 15057         28396 my $label = $self->labels->search_addr($addr);
408 15057 100       138777 print $fh "\n", $label->label_string if (defined $label);
409              
410 15057         28801 my $instr = $self->instr->[$addr];
411 15057 100       130710 if (defined $instr) {
412             # instruction
413 15018 100 100     29308 if (defined($instr->NN) && !defined($instr->format->{NN})) {
    100 66        
414             # nac the special case of 16-bit (defw) values which can
415             # nac potentially be converted to a label
416 1172 100       11638 if (ref($instr->NN)) {
417 215         1779 my $max = scalar(@{$instr->NN});
  215         384  
418 215         2009 for (my $i=0; $i<$max; $i++) {
419 219         433 my $NN = $instr->NN->[$i];
420 219         1969 my $ref_label = $self->labels->search_addr($NN);
421 219 100       2160 if (defined($ref_label)) {
422 216         502 $instr->NN->[$i] = $ref_label->name;
423             $instr->format->{NN} =
424 219     219   371 sub { my $foo=shift;
425 219 100       558 if (/^\d+$/) {return format_hex4($foo)}
  3         6  
426 216         1045 else {return $foo}
427 216         4205 };
428             }
429             }
430             }
431             else {
432 957         8199 my $NN = $instr->NN;
433 957         8339 my $ref_label = $self->labels->search_addr($NN);
434 957 100       8915 if (defined($ref_label)) {
435 646     646   3306 $instr->format->{NN} = sub { $ref_label->name };
  646         1582  
436             }
437             }
438             }
439             elsif (defined($instr->DIS) && !defined($instr->format->{DIS})) {
440 379         4731 for (['ix', 'ix_base'], ['iy', 'iy_base']) {
441 758         9509 my($reg, $func) = @$_;
442 758 100 100     1549 if ($instr->opcode =~ /$reg/ && defined(my $base = $self->$func)) {
443 292         7626 my $addr = $base + $instr->DIS;
444 292         3065 my $ref_label = $self->labels->search_addr($addr);
445 292 100       2966 if (defined $ref_label) {
446             $instr->format->{DIS} =
447 269     269   1700 sub { '+'.$ref_label->name.'-'.uc($reg).'0' };
  269         682  
448             }
449             }
450             }
451             }
452 15018         256606 print $fh $instr->asm;
453            
454 15018         212568 return $instr->next_addr;
455             }
456             else {
457             # block of defb
458              
459             # search for next defined instr
460 39         52 my $p;
461 39   100     128 for ($p = $addr; $p <= $max && ! defined($self->instr->[$p]) ; $p++) {
462             ;
463             }
464              
465 39         281287 my $comment = "unknown area ".format_hex4($addr)." to ".format_hex4($p-1);
466 39         120 print $fh "\n", " " x 8, "; Start of $comment\n";
467            
468             # print for $addr in blocks of 16
469 39         102 while ($addr < $p) {
470 2496         3732 my $max_count = $p - $addr;
471 2496         3436 my $count = BPL - ($addr % BPL); # until end of addr block
472 2496 100       3764 $count = $max_count if $count > $max_count; # until $p
473            
474 2496         4964 my $instr = CPU::Z80::Disassembler::Instruction
475             ->defb($self->memory, $addr, $count);
476 2496         6093 print $fh $instr->asm;
477 2496         34207 $addr += $count;
478             }
479              
480 39         115 print $fh " " x 8, "; End of $comment\n\n";
481            
482 39         114 return $addr;
483             }
484             }
485              
486             #------------------------------------------------------------------------------
487             sub _write_map {
488 9     9   28 my($self, $fh) = @_;
489            
490 9         27 my $it = $self->memory->loaded_iter;
491 9         30 while (my($min, $max) = $it->()) {
492 7         26 for my $addr ($min .. $max-1) {
493 65553 100 100     176222 if ($addr == $min || ($addr % 0x50) == 0) {
494 823         2141 print $fh "\n; ", format_hex4($addr), " ";
495             }
496 65553         103763 print $fh $self->get_type($addr);
497             }
498 7         51 print $fh "\n";
499             }
500             }
501              
502             #------------------------------------------------------------------------------
503             sub _write_labels {
504 9     9   27 my($self, $fh) = @_;
505            
506 9         34 my @labels = $self->labels->search_all;
507 9 100       396 return unless @labels;
508            
509 6         38 my $len = $self->labels->max_length;
510            
511 6         196 my @by_name = sort { lc($a->name) cmp lc($b->name) } @labels;
  6915         101856  
512 6         296 my @by_addr = sort { $a->addr <=> $b->addr } @labels;
  14990         217444  
513              
514 6         308 print $fh "\n; Labels\n;\n";
515 6         33 for (0 .. $#labels) {
516 2031         4253 print $fh "; ", format_hex4($by_addr[$_]->addr), " => ",
517             sprintf("%-${len}s", $by_addr[$_]->name),
518             " " x 8,
519             sprintf("%-${len}s", $by_name[$_]->name), " => ",
520             format_hex4($by_name[$_]->addr), "\n";
521             }
522             }
523              
524             #------------------------------------------------------------------------------
525             sub _write_check_calls {
526 9     9   27 my($self, $fh) = @_;
527              
528 9         18 my %unknown_calls;
529 9         18 for my $addr (keys %{$self->_can_call}) {
  9         83  
530 359 100       3392 $unknown_calls{$addr}++ unless $self->_can_call->{$addr};
531             }
532 9         102 for my $addr (keys %{$self->_call_instr}) {
  9         40  
533 10         91 my $instr = $self->_get_instr($addr);
534 10         98 $unknown_calls{$instr->NN}++;
535             }
536            
537 9 100       357 if (%unknown_calls) {
538             print $fh "\n\n; Check these calls manualy: ",
539 3         16 join(", ", sort map {format_hex4($_)} keys %unknown_calls),
  22         37  
540             "\n\n";
541             }
542             }
543              
544             #------------------------------------------------------------------------------
545             sub _opt_output_fh {
546 18     18   40 my($file) = @_;
547            
548             # open file
549 18         35 my $fh;
550 18 100       56 if (defined $file) {
551 15 50       1275 open($fh, ">", $file) or croak("write $file: $!");
552             }
553             else {
554 3         5 $fh = \*STDOUT;
555             }
556              
557 18         75 $fh;
558             }
559              
560             #------------------------------------------------------------------------------
561              
562             =head2 set_type_code, set_type_byte, set_type_word
563              
564             Sets the type of the given address. An optional count allows the definitions of
565             the type of consecutive memory locations.
566              
567             It is an error to set a type of a not-defined memory location,
568             or to redefine a type.
569              
570             =cut
571              
572             #------------------------------------------------------------------------------
573             sub _set_type {
574 17823     17823   38545 my($self, $type, $addr, $count) = @_;
575 17823   100     33334 $count ||= 1;
576            
577 17823 50       82149 croak("Invalid type $type") unless $type =~ /$TYPES_RE/;
578            
579 17823         41298 for ( ; $count > 0 ; $count--, $addr++ ) {
580 30580         161611 my $current_type = $self->get_type($addr);
581            
582 30579 100 100     71040 croak("Changing type of address ".format_hex4($addr)." from ".
      100        
583             "$current_type to $type")
584             if ($current_type ne TYPE_UNKNOWN &&
585             $type ne TYPE_UNKNOWN &&
586             $current_type ne $type);
587            
588 30578         60190 $self->_type->poke($addr, ord($type));
589             }
590             }
591 12202     12202 1 115679 sub set_type_code { shift->_set_type( TYPE_CODE, @_ ) }
592 4968     4968 1 45646 sub set_type_byte { shift->_set_type( TYPE_BYTE, @_ ) }
593 316     316 1 2919 sub set_type_word { shift->_set_type( TYPE_WORD, @_ ) }
594            
595             #------------------------------------------------------------------------------
596              
597             =head2 get_type
598              
599             Gets the type at the given address, one of TYPE_UNKNOWN, TYPE_CODE, TYPE_BYTE or
600             TYPE_WORD constants.
601              
602             It is an error to set a type of a not-defined memory location.
603              
604             =cut
605              
606             #------------------------------------------------------------------------------
607             sub get_type {
608 133086     133086 1 387525 my($self, $addr) = @_;
609            
610 133086 100       230900 croak("Getting type of unloaded memory at ".format_hex4($addr))
611             unless defined $self->memory->peek($addr);
612            
613 133085         1390422 my $current_type = $self->_type->peek($addr);
614 133085 100       1258336 $current_type = defined($current_type) ? chr($current_type) : TYPE_UNKNOWN;
615            
616 133085 50       499257 croak("Invalid type $current_type") unless $current_type =~ /$TYPES_RE/;
617            
618 133085         319516 return $current_type;
619             }
620              
621             #------------------------------------------------------------------------------
622              
623             =head2 set_call
624              
625             Declares a subroutine at the given address, either with no stack impact
626             (if 1 is passed as argument) or with a stack impact to be computed by the
627             given code reference. This function is called with $self and the address
628             after the call instruction as arguments and should return the next address(es)
629             where the code stream shall continue.
630              
631             =cut
632              
633             #------------------------------------------------------------------------------
634             sub set_call {
635 33     33 1 354 my($self, $addr, $can_call) = @_;
636 33         48 $self->_can_call->{$addr} = $can_call;
637             }
638              
639             #------------------------------------------------------------------------------
640              
641             =head2 code
642              
643             Declares the given address and all following instructions up to an unconditional
644             jump as a block of code, with an optional label.
645              
646             =cut
647              
648             #------------------------------------------------------------------------------
649             sub _get_instr {
650 157888     157888   275497 my($self, $addr) = @_;
651              
652             # read from cache or disassemble
653 157888   66     289818 $self->instr->[$addr] ||=
654             CPU::Z80::Disassembler::Instruction->disassemble($self->memory, $addr);
655             }
656            
657             sub code {
658 8593     8593 1 47437 my($self, $addr, $label) = @_;
659              
660 8593 100       17333 defined($label) and $self->labels->add($addr, $label);
661            
662 8593         14826 my @stack = ($addr); # all addresses to investigate
663            
664             # check calls
665 8593         15553 while (@stack) {
666             # follow all streams of code
667 8783         15361 while (@stack) {
668 23294         35915 my $addr = pop @stack;
669            
670             # if address is not loaded, assume a ROM entry point
671 23294 100       48833 if (!defined $self->memory->peek($addr)) {
672 2 50       19 if (!$self->labels->search_addr($addr)) {
673 0         0 my $instr = $self->labels->add($addr);
674             }
675 2         22 next;
676             }
677            
678             # skip if already checked
679 23292 100       265486 next if $self->get_type($addr) eq TYPE_CODE;
680            
681             # get instruction and mark as code
682 12199         25969 my $instr = $self->_get_instr($addr);
683 12199         33280 $self->set_type_code($addr, $instr->size);
684            
685             # create labels for branches (jump or call)
686 12199 100       140298 if ($instr->is_branch) {
687 3334         44915 my $branch_addr = $instr->NN;
688 3334         31375 my $label = $self->labels->add($branch_addr, undef, $addr);
689 3334     2510   18392 $instr->format->{NN} = sub { $label->name };
  2510         5983  
690             }
691            
692             # check call / rst addresses
693 12199 100       142719 if ($instr->is_call) {
694 1748         22691 my $call_addr = $instr->NN;
695 1748         16005 my $can_call = $self->_can_call->{$call_addr};
696 1748 100       19064 if (! defined $can_call) {
    100          
    100          
697 400         924 $self->_call_instr->{$addr}++; # mark road block
698             }
699             elsif (ref $can_call) {
700 116         368 push @stack, $can_call->($self, $instr->next_addr);
701             # call sub to handle impact
702             }
703             elsif ($can_call) {
704 1156         2901 push @stack, $instr->next_addr; # can continue
705             }
706             }
707            
708             # continue on next addresses
709 12199         138835 push @stack, $instr->next_code;
710             }
711            
712             # check if we can unwind any blocked calls, after all paths without calls are
713             # exhausted
714 8783         18538 push @stack, $self->_check_call_instr;
715             }
716             }
717              
718             #------------------------------------------------------------------------------
719             sub _check_call_instr {
720 8783     8783   13661 my($self) = @_;
721              
722 8783         10654 my @stack;
723            
724             # check simple call instructions where we blocked
725 8783         10379 for my $addr (keys %{$self->_call_instr}) {
  8783         16989  
726 13641         58416 my $instr = $self->_get_instr($addr);
727 13641         146657 my $call_addr = $instr->NN;
728            
729 13641 100 66     118662 if ( # if any of the calls is conditional, then _can_call
730             $instr->opcode =~ /call \w+,NN/
731             ||
732             # if address after the call is CODE, then _can_call
733             $self->get_type($instr->next_addr) eq TYPE_CODE
734             ) {
735            
736             # mark for later; do not call code() directly because we are
737             # iterating over _call_instr that might be changed by code()
738 48         163 $self->_can_call->{$call_addr} = 1;
739 48         535 push @stack, $instr->next_addr; # code from here
740 48         824 delete $self->_call_instr->{$addr}; # processed
741             }
742             }
743            
744             # check remaining by following code flow
745 8783         51157 for my $addr (keys %{$self->_call_instr}) {
  8783         16028  
746 13593         55734 my $instr = $self->_get_instr($addr);
747 13593         142266 my $call_addr = $instr->NN;
748            
749             # if call flow in called subroutine
750             # does not pop return address, than _can_call
751 13593         116031 my $can_call = $self->_check_call($call_addr);
752 13593 100       32496 if (defined $can_call) {
753 337         808 $self->_can_call->{$call_addr} = $can_call;
754 337         3145 push @stack, $addr; # re-check call to call can_call
755 337         771 $self->_set_type(TYPE_UNKNOWN, $addr, $instr->size);
756             # allow recheck to happen
757 337         3871 delete $self->_call_instr->{$addr}; # processed
758             }
759             }
760            
761 8783         83911 return @stack;
762             }
763              
764             #------------------------------------------------------------------------------
765             sub _check_call {
766 13593     13593   21619 my($self, $call_addr) = @_;
767            
768 13593         17207 my %seen; # addresses we have checked
769 13593         19976 my($addr, $sp_level) = ($call_addr, 0);
770 13593         28465 my @stack = ([$addr, $sp_level]); # all addresses to investigate
771            
772             # follow code
773 13593         26615 while (@stack) {
774 133342         1761083 ($addr, $sp_level) = @{pop @stack};
  133342         231246  
775 133342 100       367815 next if $seen{$addr}++; # prevent loops
776            
777             # run into some known code
778 118504         250871 my $can_call = $self->_can_call->{$addr};
779 118504 100       1065129 if (defined $can_call) {
780 84 100       516 return $can_call if $sp_level == 0;
781             }
782              
783             # if address is not loaded, return "dont know"
784 118421 100       214857 if (!defined $self->memory->peek($addr)) {
785 1         12 return undef;
786             }
787              
788             # get the instruction
789 118420         1315087 my $instr = $self->_get_instr($addr);
790 118420         1146663 local $_ = $instr->opcode;
791            
792             # check stack impact
793 118420 100       1370158 if (/ret/) {
    100          
    100          
    50          
    50          
    100          
    100          
794 244 100       1493 return 1 if $sp_level == 0; # can call if stack empty
795             }
796             elsif (/push/) {
797 15375         21910 $sp_level += 2;
798             }
799             elsif (/pop/) {
800 2314         3767 $sp_level -= 2;
801 2314 100       4701 return 0 if $sp_level < 0; # STACK IMPACT!
802             }
803             elsif (/dec sp/) {
804 0         0 $sp_level++;
805             }
806             elsif (/inc sp/) {
807 0         0 $sp_level--;
808 0 0       0 return 0 if $sp_level < 0; # STACK IMPACT!
809             }
810             elsif (/ex \(sp\),/) {
811 25 100       88 return 0 if $sp_level < 2; # STACK IMPACT!
812             }
813             elsif (/ld sp/) {
814 1         7 return 0; # STACK IMPACT!
815             }
816            
817             # continue on next address, but dont follow calls
818 118166 100       245089 if ($instr->is_call) {
    100          
819 15426         186170 my $can_call = $self->_can_call->{$instr->NN};
820 15426 100 100     278297 if (defined($can_call) && !ref($can_call) && $can_call) {
      100        
821 2472         5637 push @stack, [$instr->next_addr, $sp_level]; # continue after call
822             }
823             }
824             elsif ($instr->is_branch) {
825 17411         207008 push @stack, [$instr->NN, $sp_level];
826             }
827            
828 118166 100       1123822 push @stack, [$instr->next_addr, $sp_level] unless $instr->is_break_flow;
829             }
830            
831 13255         155388 return undef; # don't know
832             }
833              
834             #------------------------------------------------------------------------------
835              
836             =head2 defb, defb2, defw, defm, defmz, defm7
837              
838             Declares the given address as a def* instruction
839             with an optional label.
840              
841             =cut
842              
843             #------------------------------------------------------------------------------
844             sub _def {
845 5283     5283   9417 my($self, $factory, $set_type,
846             $addr, $count, $label) = @_;
847              
848 5283 100       9356 defined($label) and $self->labels->add($addr, $label);
849            
850 5283         10971 my $instr = CPU::Z80::Disassembler::Instruction
851             ->$factory($self->memory, $addr, $count);
852 5283         14978 $self->instr->[$addr] = $instr;
853 5283         49882 $self->$set_type($addr, $instr->size);
854            
855 5283         58111 return $instr;
856             }
857              
858             sub defb {
859 4761     4761 1 31240 my($self, $addr, $count, $label) = @_;
860 4761         9235 $self->_def('defb', 'set_type_byte', $addr, $count, $label);
861             }
862              
863             sub defb2 {
864 0     0 1 0 my($self, $addr, $count, $label) = @_;
865 0         0 $self->_def('defb2', 'set_type_byte', $addr, $count, $label);
866             }
867              
868             sub defw {
869 316     316 1 921 my($self, $addr, $count, $label) = @_;
870 316         672 $self->_def('defw', 'set_type_word', $addr, $count, $label);
871             }
872              
873             sub defm {
874 78     78 1 1045 my($self, $addr, $length, $label) = @_;
875 78         171 $self->_def('defm', 'set_type_byte', $addr, $length, $label);
876             }
877              
878             sub defmz {
879 0     0 1 0 my($self, $addr, $count, $label) = @_;
880 0         0 $self->_def('defmz', 'set_type_byte', $addr, $count, $label);
881             }
882              
883             sub defm7 {
884 128     128 1 2295 my($self, $addr, $count, $label) = @_;
885 128         260 $self->_def('defm7', 'set_type_byte', $addr, $count, $label);
886             }
887              
888             #------------------------------------------------------------------------------
889              
890             =head2 block_comment
891              
892             Creates a block comment to insert before the given address.
893              
894             =cut
895              
896             #------------------------------------------------------------------------------
897             sub block_comment {
898 12525     12525 1 233631 my($self, $addr, $block_comment) = @_;
899            
900 12525 100       24594 if (defined $block_comment) {
901 6060         7859 chomp($block_comment);
902 6060   100     11044 $self->_block_comments->[$addr] ||= "";
903 6060         63919 $self->_block_comments->[$addr] .= "$block_comment\n";
904             }
905             }
906              
907             #------------------------------------------------------------------------------
908              
909             =head2 line_comments
910              
911             Appends each of the given line comments to the instrutions starting at
912             the given address, one comment per instruction.
913              
914             =cut
915              
916             #------------------------------------------------------------------------------
917             sub line_comments {
918 6747     6747 1 12579 my($self, $addr, @line_comments) = @_;
919            
920 6747         10878 for (@line_comments) {
921 6747         13155 my $instr = $self->instr->[$addr];
922 6747 100       63250 croak("Cannot set comment of unknown instruction at ".format_hex4($addr))
923             unless $instr;
924 6746   100     13516 my $old_comment = $instr->comment // "";
925 6746 100       65020 $old_comment .= "\n" if $old_comment;
926 6746         18807 $instr->comment($old_comment . $_);
927 6746         67606 $addr += $instr->size;
928             }
929             }
930              
931             #------------------------------------------------------------------------------
932              
933             =head2 relative_arg
934              
935             Shows the instruction argument (NN or N) relative to a given label name.
936             Label name can be '$' for a value relative to the instruction pointer.
937              
938             =cut
939              
940             #------------------------------------------------------------------------------
941              
942             =head2 create_control_file
943              
944             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
945              
946             Creates a new control file for the given input binary file, starting at the given address
947             and for the given architecture.
948              
949             The address defaults to zero, and the architecture to undefined. The architecture may be
950             implemented in the future, for example to define system variable equates for the given
951             architecture.
952              
953             It is an error to overwrite a control file.
954              
955             The Control File is the input file for a disassembly run in an interactive disassembly
956             session, and the outout is the .asm. After each run, the user studies the output
957             .asm file, and includes new commands in the control file to add information to the
958             .asm file on the next run.
959              
960             This function creates a template control file that contains just the hex dump of the
961             binary file and the decoded assembly instruction at each address, e.g.
962              
963             0000 :F
964             0000 D3FD out ($FD),a
965             0002 01FF7F ld bc,$7FFF
966             0005 C3CB03 jp $03CB
967              
968             The control file commands start with a ':' and refer to the hexadecimal address at the
969             start of the line.
970              
971             Some commands operate on a range of addresses and accept the inclusive range limits separated
972             by a single '-'.
973              
974             A line starting with a blank uses the same address as the previous command.
975              
976             A semicolon starts a comment in the control file.
977              
978             0000 :; define next address as 0x0000
979             : ; at the same address 0x0000
980             0000-001F :B ; define a range address of bytes
981              
982             The dump between the address and the ':' is ignored and is helpfull as a guide while adding
983             information to the control file.
984              
985             =head2 load_control_file
986              
987             $dis->load_control_file($ctl_file);
988              
989             Load the control file created by and subsequently edited by the user
990             and create a new .asm disassembly file.
991              
992             =head1 Control File commands
993              
994             =head2 Include
995              
996             Include another control file at the current location.
997              
998             #include vars.ctl
999              
1000             =head2 File
1001              
1002             Load a binary file at the given address.
1003              
1004             0000 :F zx81.rom
1005              
1006             =head2 Code
1007              
1008             Define the start of a code routine, with an optional label. The code is not known to be
1009             stack-safe, i.e. not to have data bytes following the call instruction. The disassembler
1010             stops disassembly when it cannot determine if the bytes after a call instruction are
1011             data or code.
1012              
1013             0000 :C START
1014              
1015             =head2 Procedure
1016              
1017             Define the start of a procedure with a possible list of arguments following the call
1018             instruction.
1019              
1020             The signature is a list of {'B','W','C'}+, identifing each of the following items
1021             after the call instruction (Byte, Word or Code). In the following example the call
1022             istruction is followed by one byte and one word, and the procedure returns
1023             to the address after the word.
1024              
1025             0000 P proc B,W,C
1026              
1027             The signature defaults to a single 'C', meaning the procedure returns to the point after call.
1028              
1029             A signature without a 'C' means that the call never returns.
1030              
1031             =head2 Bytes and Words
1032              
1033             Define data bytes and words in the given address range.
1034              
1035             0000-0003 :B label
1036             0000-0003 :B label
1037             0000-0003 :B2[1] label ; one byte per line, binary data
1038             0000-0003 :W label
1039              
1040             =head2 Define a symbol
1041              
1042             Define the name of a symbol.
1043              
1044             4000 := ERR_NO comment\nline 2 of comment
1045              
1046             =head2 IX and IY base
1047              
1048             Define base address for IX and IY indexed mode.
1049              
1050             4000 :IX
1051             4000 :IY
1052              
1053             =head2 Header block
1054              
1055             Define a text block to be output before the given address. The block is inserted vervbatin,
1056             so include ';' if a comment is intended.
1057              
1058             0000 :# ; header
1059             :# ; continuation
1060             :# abc EQU 23
1061              
1062             =head2 Line comment
1063              
1064             Define a line comment to show at the given address.
1065              
1066             0000 :; comment
1067              
1068             =head2 Header and Footer
1069              
1070             Define a text block to be output at the top and the bottom of the assembly file.
1071             The block is inserted vervbatin, so include ';' if a comment is intended.
1072              
1073             0000 :< ; header
1074             :< ; continuation
1075             :> ; footer
1076              
1077             =cut
1078              
1079             #------------------------------------------------------------------------------
1080              
1081             sub _find_file {
1082 5     5   90 my($self, $from_file, $include_file) = @_;
1083            
1084 5 100       105 return $include_file if -f $include_file;
1085            
1086             # test relative to parent
1087 1         8 my $relative = path(path($from_file)->parent, path($include_file)->basename);
1088 1 50       191 return $relative if -f $relative;
1089            
1090 0         0 return $from_file;
1091             }
1092              
1093             #------------------------------------------------------------------------------
1094              
1095             sub create_control_file {
1096 5     5 1 22211735 my($class, $ctl_file, $bin_file, $addr, $arch) = @_;
1097            
1098 5 100       104 -f $ctl_file and die "Error: $ctl_file exists\n";
1099            
1100 4         29 my $dis = $class->new;
1101 4         26 $dis->memory->load_file($bin_file, $addr);
1102 4         67 $dis->write_dump($ctl_file);
1103 4         53 my @lines = ( <
1104             ;------------------------------------------------------------------------------
1105             ; CPU::Z80::Disassembler control file
1106             ;------------------------------------------------------------------------------
1107              
1108             END
1109             sprintf("%04X :F $bin_file\n\n", $addr),
1110             path($ctl_file)->lines
1111             );
1112 4         16299 path($ctl_file)->spew(@lines);
1113             }
1114              
1115             #------------------------------------------------------------------------------
1116              
1117             sub load_control_file {
1118 5     5 1 380 my($self, $file) = @_;
1119            
1120 5         14 my $addr = 0; my $end_addr = 0;
  5         14  
1121 5 50       174 open(my $fh, $file) or die "cannot open $file\n";
1122 5         197 while (<$fh>) {
1123 41525         154755 chomp;
1124 41525         66381 s/^\s*;.*$//; # remove comments
1125 41525         73863 s/\s+$//;
1126 41525 100       91135 next unless /\S/;
1127              
1128 36285 100       58759 if (/^ \#include \s+ (\S+) /ix) {
1129 1         10 $self->load_control_file($self->_find_file($file, $1));
1130             }
1131             else {
1132             # decode start address
1133 36284 100       93851 if (s/^ ([0-9a-f]+) //ix) {
1134 26213         46263 $addr = hex($1);
1135             }
1136              
1137             # decode end address
1138 36284         45877 $end_addr = $addr;
1139 36284 100       57267 if (s/^ -([0-9a-f]+) //ix) {
1140 1463         2265 $end_addr = hex($1);
1141             }
1142              
1143             # remove all chars up to ':', ignore lines without ':'
1144 36284 100       90164 /:\s*/ or next;
1145 20549         43763 $_ = $';
1146 20549 50       41710 next unless /\S/;
1147            
1148             # decode command
1149 20549         27627 my($include_file, $label, $comment, $signature, $type);
1150            
1151             # File
1152 20549 100       113106 if (($include_file) = /^ F \s+ (\S+) /ix) {
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
1153 4         26 $self->memory->load_file($self->_find_file($file, $include_file), $addr);
1154             }
1155            
1156             # Code
1157             elsif (($label) = /^ C \s* (\w+)? /ix) {
1158 7506         17062 $self->code($addr, $label);
1159             }
1160            
1161             # Define label
1162             elsif (($label, $comment) = /^ = \s+ (\S+) \s* ;? \s*(.*)/ix) {
1163 40         83 $comment =~ s/ \\ n /\n/gx;
1164 40         71 my $instr = $self->labels->add($addr, $label);
1165 40 50       81 $instr->comment($comment) if $comment;
1166             }
1167            
1168             # Block comment
1169             elsif (($comment) = /^ \# \s? (.*)/ix) {
1170 3270         6248 $self->block_comment($addr, $comment);
1171             }
1172            
1173             # Header
1174             elsif (($comment) = /^ \< \s? (.*)/ix) {
1175 56   100     114 my $header = $self->header // "";
1176 56 100       511 $header .= "\n" if $header;
1177 56         129 $self->header($header.$comment);
1178             }
1179            
1180             # Footer
1181             elsif (($comment) = /^ \> \s? (.*)/ix) {
1182 0   0     0 my $footer = $self->footer // "";
1183 0 0       0 $footer .= "\n" if $footer;
1184 0         0 $self->footer($footer.$comment);
1185             }
1186            
1187             # Line comment
1188             elsif (($comment) = /^ \; [\s;]* (.*)/ix) {
1189 6746         14449 $self->line_comments($addr, $comment);
1190             }
1191            
1192             # Procedure
1193             elsif (($label, $signature) = /^ P \s+ (\w+) \s* (.*)/ix) {
1194 0         0 $self->code($addr, $label);
1195 0         0 $signature =~ s/,/ /g;
1196 0         0 my @types = split(' ', $signature);
1197 0 0       0 @types = ('C') if !@types;
1198             $self->set_call($addr, sub {
1199 0     0   0 my($self, $addr) = @_;
1200 0         0 for (@types) {
1201 0 0       0 if ($_ eq 'B') {
    0          
    0          
1202 0         0 $self->defb($addr);
1203 0         0 $addr++
1204             }
1205             elsif ($_ eq 'W') {
1206 0         0 $self->defW($addr);
1207 0         0 $addr += 2;
1208             }
1209             elsif ($_ eq 'C') {
1210 0         0 return $addr;
1211             }
1212             else {
1213 0         0 die "procedure argument type $_ unknown";
1214             }
1215             }
1216 0         0 return;
1217 0         0 });
1218             }
1219            
1220             # Byte | Word
1221             elsif (my($type, $ipl, $label) = /^ (B2 | B | W | M) (?: \[ (\d+) \] )? \s* (\w+)?/ix) {
1222 2926 100       5579 $self->labels->add($addr, $label) if defined $label;
1223 2926 50       4887 $ipl = 16 unless $ipl;
1224              
1225 2926         3429 my($func, $size);
1226 2926 100       4790 if ($type eq 'B') { ($func, $size) = ('defb', 1); }
  2724 50       3960  
    50          
    0          
1227 0         0 elsif ($type eq 'B2') { ($func, $size) = ('defb2', 1); }
1228 202         305 elsif ($type eq 'W') { ($func, $size) = ('defw', 2); }
1229 0         0 elsif ($type eq 'M') { ($func, $size) = ('defm', 1); $ipl = 32; }
  0         0  
1230 0         0 else { die "type $type unknown"; }
1231            
1232 2926 100 100     5115 if ($size == 2 && $addr == $end_addr) {
1233 101         135 $end_addr++; # a word uses two addresses
1234             }
1235            
1236 2926         4978 for (my $a = $addr; $a <= $end_addr; ) {
1237 2926         5345 my $items = int(($end_addr - $a + 1) / $size);
1238 2926 50       4411 $items = $ipl if $items > $ipl;
1239            
1240 2926         7383 $self->$func($a, $items);
1241 2926         14558 $a += $size * $items;
1242             }
1243             }
1244            
1245             # IX
1246             elsif (/^ IX /ix) {
1247 0         0 $self->ix_base($addr);
1248             }
1249            
1250             # IY
1251             elsif (/^ IY /ix) {
1252 1         4 $self->iy_base($addr);
1253             }
1254            
1255             # undefined
1256             else {
1257 0         0 die "Load '$file': cannot parse '$_'";
1258             }
1259             }
1260             }
1261             }
1262            
1263             #------------------------------------------------------------------------------
1264             sub relative_arg {
1265 25     25 1 374 my($self, $addr, $label_name) = @_;
1266              
1267             # disassemble from here, if needed
1268 25         73 $self->code($addr);
1269 25 50       62 my $instr = $self->_get_instr($addr) or die;
1270            
1271 25         282 my $label_addr;
1272 25 100       48 if ($label_name eq '$') {
1273 3         9 $label_addr = $instr->addr;
1274             }
1275             else {
1276 22 100       98 my $label = $self->labels->search_name($label_name)
1277             or croak("Label '$label_name' not found");
1278 21         244 $label_addr = $label->addr;
1279             }
1280            
1281 24 100       241 my $NN = defined($instr->NN) ? 'NN' :
    100          
1282             defined($instr->N ) ? 'N' :
1283             croak("Instruction at address ".format_hex4($addr).
1284             " has no arguments");
1285 23         319 my $arg = $instr->$NN;
1286 23 50       247 $arg = [$arg] unless ref $arg; # defb stores as [N]
1287            
1288 23         41 my $delta = $arg->[0] - $label_addr;
1289 23         69 my $expr = $label_name . format_dis($delta);
1290 23     23   138 $instr->format->{$NN} = sub { $expr };
  23         156  
1291             }
1292              
1293             #------------------------------------------------------------------------------
1294              
1295             =head1 ACKNOWLEDGEMENTS
1296              
1297             =head1 AUTHOR
1298              
1299             Paulo Custodio, C<< >>
1300              
1301             =head1 BUGS and FEEDBACK
1302              
1303             Please report any bugs or feature requests through
1304             the web interface at
1305             L.
1306              
1307             =head1 LICENSE AND COPYRIGHT
1308              
1309             Copyright 2010 Paulo Custodio.
1310              
1311             This program is free software; you can redistribute it and/or modify it
1312             under the terms of either: the GNU General Public License as published
1313             by the Free Software Foundation; or the Artistic License.
1314              
1315             See http://dev.perl.org/licenses/ for more information.
1316              
1317             The Spectrum 48K ROM used in the test scripts is Copyright by Amstrad.
1318             Amstrad have kindly given their permission for the
1319             redistribution of their copyrighted material but retain that copyright
1320             (see L).
1321              
1322             =cut
1323              
1324             1;