File Coverage

blib/lib/CPU/Z80/Disassembler.pm
Criterion Covered Total %
statement 415 453 91.6
branch 195 234 83.3
condition 46 52 88.4
subroutine 55 58 94.8
pod 21 21 100.0
total 732 818 89.4


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler - Disassemble the flow of a Z80 program
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 6     6   1027334 use strict;
  6         47  
  6         189  
14 6     6   48 use warnings;
  6         13  
  6         148  
15              
16 6     6   28 use Carp; our @CARP_NOT; # do not report errors in this package
  6         22  
  6         460  
17              
18 6     6   2925 use CPU::Z80::Disassembler::Memory;
  6         19  
  6         51  
19 6     6   3248 use CPU::Z80::Disassembler::Instruction;
  6         23  
  6         44  
20 6     6   247 use CPU::Z80::Disassembler::Format;
  6         13  
  6         350  
21 6     6   3585 use CPU::Z80::Disassembler::Labels;
  6         18  
  6         32  
22              
23 6     6   2943 use Path::Tiny;
  6         37940  
  6         505  
24              
25             our $VERSION = '1.00';
26              
27             #------------------------------------------------------------------------------
28              
29             =head1 SYNOPSIS
30              
31             use CPU::Z80::Disassembler;
32             $dis = CPU::Z80::Disassembler->new;
33             $dis->memory->load_file($file_name, $addr, $opt_skip_bytes, $opt_length);
34             $dis->write_dump; $dis->write_dump($file);
35             $dis->analyse;
36             $dis->write_asm; $dis->write_asm($file);
37              
38             $dis->get_type($addr);
39             $dis->set_type_code($addr [,$count]);
40             $dis->set_type_byte($addr [,$count]);
41             $dis->set_type_word($addr [,$count]);
42              
43             $dis->set_call($addr, 1); # this may be called
44             $dis->set_call($addr, $sub); # @next_code = $sub->($self, $next_addr) will be called
45              
46             $dis->code($addr [, $label]);
47             $dis->defb($addr [, $count][, $label]);
48             $dis->defw($addr [, $count][, $label]);
49             $dis->defm($addr, $size [, $label]);
50             $dis->defmz($addr [, $count][, $label]);
51             $dis->defm7($addr [, $count][, $label]);
52              
53             $dis->block_comment($addr, $block_comment);
54             $dis->line_comments($addr, @line_comments);
55              
56             $dis->relative_arg($addr, $label_name);
57             $dis->ix_base($addr);
58             $dis->iy_base($addr);
59            
60             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
61             $dis->load_control_file($ctl_file);
62              
63             =head1 DESCRIPTION
64              
65             Implements a Z80 disassembler. Loads a binary file into memory and dumps
66             an unprocessed disassembly listing (see C).
67              
68             Alternatively there are functions to tell the disassembler where there are
69             data bytes and what are code entry points and labels. The disassembler will
70             follow the code by simulating a Z80 processor, to find out where the code region
71             finishes.
72              
73             As a C instruction may be followed by data, the disassembler tries to find
74             out if the called routine manipulates the return stack. If it does not, and ends
75             with a C, then the routine is considered safe, and the disassembly continues
76             after the C instruction. If the routine is not considered safe, a message is
77             written at the end of the disassembled file asking the used to check the
78             routines manually; the C method should then be used to tell how to
79             handle calls to that routine on the next iteration.
80              
81             The C function can be called just before dumping the output to try to find
82             higher level constructs in the assembly listing. For example, it transforms the
83             sequence C into C.
84              
85             The C dumps an assembly listing that can be re-assembled to obtain the
86             starting binary file. All the unknown region bytes are disassembled as C
87             instructions, and a map is shown at the end of the file with the code regions (C),
88             byte regions (C), word regions (C) and unknown regions (C<->).
89              
90             =head1 FUNCTIONS
91              
92             =head2 new
93              
94             Creates the object.
95              
96             =head2 memory
97              
98             L object
99             containing the memory being analysed.
100              
101             =head2 instr
102              
103             Reference to an array that contains all the disassembled instructions
104             as L
105             objects, indexed
106             by the address of the instruction. The entry is C if there is no
107             disassembled instruction at that address (either not known, or pointing to the second,
108             etc, bytes of a multi-byte instruction).
109              
110             =head2 labels
111              
112             Returns the L
113             object that contains all the defined labels.
114              
115             =head2 header, footer
116              
117             Attributes containing blocks of text to dump before and after the assembly listing.
118             They are used by C.
119              
120             =head2 ix_base, iy_base
121              
122             Base addess for (IX+DIS) and (IY+DIS) instructions, if constant in all the code.
123             Causes the disassembly to dump:
124              
125             IY0 equ 0xHHHH ; 0xHHHH is iy_base
126             ...
127             ld a,(iy+0xHHHH-IY0) ; 0xHHHH is the absolute address
128              
129             =cut
130              
131             #------------------------------------------------------------------------------
132             # Hold a disassembly session
133 6     6   45 use base 'Class::Accessor';
  6         11  
  6         654  
134             __PACKAGE__->mk_accessors(
135             'memory', # memory to disassemble
136             '_type', # identified type of each memory address, TYPE_xxx
137             'instr', # array of Instruction objects at each address
138             'labels', # all defined labels
139             '_call_instr', # hash of all call instructions where we are blocked
140             '_can_call', # hash of all subroutines we may call:
141             # 1 : can be called, no stack impact
142             # 0 : has stack impact, needs to be checked manually
143             # sub {} : call sub->($self, $next_addr) to handle
144             # stack impact and return next code addresses
145             # to continue disassembly after call
146             '_block_comments',
147             # array of block comment string at each address, printed before
148             # the address
149             'header', 'footer',
150             # header and footer sections of disassembled file
151             'ix_base', 'iy_base',
152             # base addess for (IX+DIS) and (IY+DIS)
153             );
154              
155 6     6   44 use constant TYPE_UNKNOWN => '-';
  6         12  
  6         316  
156 6     6   35 use constant TYPE_CODE => 'C';
  6         16  
  6         282  
157 6     6   35 use constant TYPE_BYTE => 'B';
  6         13  
  6         286  
158 6     6   36 use constant TYPE_WORD => 'W';
  6         38  
  6         435  
159             my $TYPES_RE = qr/^[-CBW]$/;
160              
161 6     6   44 use Exporter 'import';
  6         12  
  6         6492  
162             our @EXPORT = qw( TYPE_UNKNOWN TYPE_CODE TYPE_BYTE TYPE_WORD );
163              
164              
165             sub new {
166 18     18 1 11404614 my($class) = @_;
167 18         185 my $memory = CPU::Z80::Disassembler::Memory->new;
168 18         157 my $type = CPU::Z80::Disassembler::Memory->new;
169 18         332 my $labels = CPU::Z80::Disassembler::Labels->new;
170 18         2083 return bless { memory => $memory,
171             _type => $type,
172             instr => [],
173             labels => $labels,
174             _call_instr => {},
175             _can_call => {},
176             _block_comments => [],
177             }, $class;
178             }
179             #------------------------------------------------------------------------------
180              
181             =head2 write_dump
182              
183             Outputs a disassembly dump on the given file, or standard output if no file
184             provided.
185              
186             The disassembly dump shows the address and bytes of each instruction with
187             the disassembled instruction.
188              
189             =cut
190              
191             #------------------------------------------------------------------------------
192              
193             sub write_dump {
194 9     9 1 5662 my($self, $file) = @_;
195              
196 9         43 my $fh = _opt_output_fh($file);
197            
198 9         45 my $it = $self->memory->loaded_iter;
199 9         21 my $instr;
200            
201 9         32 while (my($min, $max) = $it->()) {
202 7         33 for (my $addr = $min; $addr <= $max; $addr = $instr->next_addr) {
203             # either a Z80 instruction, or, if not found, a defb
204 42000   66     822139 $instr = CPU::Z80::Disassembler::Instruction->disassemble(
205             $self->memory, $addr)
206             || CPU::Z80::Disassembler::Instruction->defb(
207             $self->memory, $addr);
208 42000         116134 print $fh $instr->dump;
209             }
210             }
211             }
212              
213             #------------------------------------------------------------------------------
214              
215             =head2 analyse
216              
217             Analyse the disassembled information looking for higher level constructs.
218             For example, it replaces 'ld c,(hl):inc hl' by 'ldi c,(hl)'.
219              
220             Should be called immediately before C.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225             sub analyse {
226 1     1 1 30 my($self) = @_;
227            
228             # search for composed instructions
229 1         7 my $it = $self->memory->loaded_iter;
230 1         7 my $limit_addr = $self->_limit_addr(0);
231 1         5 while (my($min, $max) = $it->()) {
232 1         5 for (my $addr = $min; $addr <= $max; ) {
233 8954         95423 my $instr = $self->instr->[$addr];
234 8954 50       90363 if (defined $instr) {
235 8954 100       18619 if ($instr->is_code) {
236            
237             # get address of next label
238 6598 100       74593 if ($addr >= $limit_addr) {
239 1036         2311 $limit_addr = $self->_limit_addr($addr + 1);
240             }
241            
242             # disassemble long instruction
243 6598         14271 my $long_instr = CPU::Z80::Disassembler::Instruction
244             ->disassemble($self->memory,
245             $addr, $limit_addr);
246 6598 100       14306 if ($instr->opcode ne $long_instr->opcode) {
247 193         3661 $instr = $self->_merge_instr($long_instr);
248             }
249             }
250 8954         153705 $addr += $instr->size; # both code and data
251             }
252             else {
253 0         0 $addr++; # undefined
254             }
255             }
256             }
257             }
258              
259             sub _merge_instr {
260 193     193   349 my($self, $new_instr) = @_;
261            
262 193         267 my @comments;
263 193 50       447 push @comments, $new_instr->comment if defined $new_instr->comment;
264 193         1974 for my $addr ($new_instr->addr .. $new_instr->next_addr - 1) {
265 511         6367 my $old_instr = $self->instr->[$addr];
266 511 100       5257 if ($old_instr) {
267             # copy comments
268 406 100       780 push @comments, $old_instr->comment if defined $old_instr->comment;
269            
270             # copy formats
271 406 50       7272 if (defined $old_instr->_format) {
272 0         0 for my $key (keys %{$old_instr->_format}) {
  0         0  
273             $new_instr->format->{$key} =
274 0         0 $old_instr->format->{$key};
275             }
276             }
277            
278             # delete old
279 406         3940 $self->instr->[$addr] = undef;
280             }
281             }
282 193 100       3110 $new_instr->comment(join("\n", @comments)) if @comments;
283 193         1838 $self->instr->[$new_instr->addr] = $new_instr;
284            
285 193         4093 return $new_instr;
286             }
287              
288             sub _limit_addr {
289 1037     1037   1801 my($self, $addr) = @_;
290 1037         2111 my $label = $self->labels->next_label($addr);
291 1037 50       13303 my $limit_addr = (defined $label) ? $label->addr : 0x10000;
292 1037         12587 return $limit_addr;
293             }
294              
295             #------------------------------------------------------------------------------
296              
297             =head2 write_asm
298              
299             Outputs a disassembly listing on the given file, or standard output if no file
300             provided.
301              
302             The disassembly listing can be assembled to obtain the original binary file.
303              
304             =cut
305              
306             #------------------------------------------------------------------------------
307             sub write_asm {
308 9     9 1 2131 my($self, $file) = @_;
309              
310 9         58 my $fh = _opt_output_fh($file);
311              
312 9         81 $self->_write_header($fh);
313            
314 9         49 my $comment_it = $self->_block_comments_iter;
315 9         57 my $it = $self->memory->loaded_iter;
316 9         46 while (my($min, $max) = $it->()) {
317 7         34 my $instr = CPU::Z80::Disassembler::Instruction
318             ->org($self->memory, $min);
319 7         70 print $fh $instr->asm;
320            
321 7         212 for (my $addr = $min; $addr <= $max; ) {
322             # block comments
323 15057         291466 print $fh $comment_it->($addr);
324            
325 15057         31241 $addr = $self->_write_instr($fh, $addr, $max);
326             }
327            
328 7         153 print $fh "\n";
329             }
330            
331             # final comments
332 9         39 print $fh $comment_it->();
333            
334 9 100       71 print $fh $self->footer if defined $self->footer;
335              
336 9         157 $self->_write_map($fh);
337 9         62 $self->_write_labels($fh);
338 9         163 $self->_write_check_calls($fh);
339             }
340              
341             #------------------------------------------------------------------------------
342             # iterator to return block comments up to given address
343             sub _block_comments_iter {
344 9     9   35 my($self) = @_;
345 9         31 my $i = 0;
346             return sub {
347 15066     15066   24517 my($addr) = @_;
348 15066         19404 my $max = $#{$self->_block_comments};
  15066         33733  
349 15066 100       145553 $addr = $max unless defined $addr;
350            
351 15066         22108 my $return = "";
352 15066   100     48921 while ($i <= $addr && $i <= $max) {
353 24562         44173 my $comment = $self->_block_comments->[$i++];
354 24562 100       264783 $return .= $comment if defined $comment;
355             }
356 15066         31314 $return;
357 9         97 };
358             }
359              
360             #------------------------------------------------------------------------------
361 6     6   62 use constant BPL => 16;
  6         15  
  6         29873  
362              
363             #------------------------------------------------------------------------------
364             # write the file header and the label equates
365             sub _write_header {
366 9     9   38 my($self, $fh) = @_;
367            
368 9         61 my $label_width = $self->labels->max_length + 1;
369            
370 9 100       307 print $fh $self->header if defined $self->header;
371            
372 9         203 my @labels = sort { $a->addr <=> $b->addr } $self->labels->search_all;
  14990         235445  
373 9         516 for my $label (@labels) {
374 2031 100       33954 next if defined $self->instr->[$label->addr]; # no need for EQU
375 120         2133 print $fh $label->equ_string($label_width);
376             }
377 9 100       143 print $fh "\n" if @labels;
378            
379             # create IX0 / IY0 base
380 9         32 my $printed_base;
381 9         59 for (['IX0', 'ix_base'], ['IY0', 'iy_base']) {
382 18         48 my($base, $func) = @$_;
383 18         98 my $addr = $self->$func;
384 18 100       219 if (defined $addr) {
385 2         15 my $label = $self->labels->search_addr($addr);
386 2 50       52 if (defined $label) {
387 2         9 $addr = $label->name;
388             }
389             else {
390 0         0 $addr = format_hex4($addr);
391             }
392            
393 2         38 print $fh sprintf("%-*s equ %s\n", $label_width-1, $base, $addr);
394 2         12 $printed_base++;
395             }
396             }
397              
398 9 100       125 print $fh "\n" if $printed_base;
399             }
400            
401             #------------------------------------------------------------------------------
402             # write one instruction
403             sub _write_instr {
404 15057     15057   26755 my($self, $fh, $addr, $max) = @_;
405            
406             # label
407 15057         30854 my $label = $self->labels->search_addr($addr);
408 15057 100       154124 print $fh "\n", $label->label_string if (defined $label);
409              
410 15057         29859 my $instr = $self->instr->[$addr];
411 15057 100       154710 if (defined $instr) {
412             # instruction
413 15018 100 100     34466 if (defined($instr->NN) && !defined($instr->format->{NN})) {
    100 66        
414             # nac the special case of 16-bit (defw) values which can
415             # nac potentially be converted to a label
416 1172 100       12611 if (ref($instr->NN)) {
417 215         1992 my $max = scalar(@{$instr->NN});
  215         412  
418 215         2155 for (my $i=0; $i<$max; $i++) {
419 219         449 my $NN = $instr->NN->[$i];
420 219         2138 my $ref_label = $self->labels->search_addr($NN);
421 219 100       2419 if (defined($ref_label)) {
422 216         597 $instr->NN->[$i] = $ref_label->name;
423             $instr->format->{NN} =
424 219     219   444 sub { my $foo=shift;
425 219 100       625 if (/^\d+$/) {return format_hex4($foo)}
  3         8  
426 216         1119 else {return $foo}
427 216         4859 };
428             }
429             }
430             }
431             else {
432 957         9160 my $NN = $instr->NN;
433 957         9121 my $ref_label = $self->labels->search_addr($NN);
434 957 100       9660 if (defined($ref_label)) {
435 646     646   3867 $instr->format->{NN} = sub { $ref_label->name };
  646         1679  
436             }
437             }
438             }
439             elsif (defined($instr->DIS) && !defined($instr->format->{DIS})) {
440 379         5061 for (['ix', 'ix_base'], ['iy', 'iy_base']) {
441 758         11045 my($reg, $func) = @$_;
442 758 100 100     1613 if ($instr->opcode =~ /$reg/ && defined(my $base = $self->$func)) {
443 292         8184 my $addr = $base + $instr->DIS;
444 292         2844 my $ref_label = $self->labels->search_addr($addr);
445 292 100       3017 if (defined $ref_label) {
446             $instr->format->{DIS} =
447 269     269   2095 sub { '+'.$ref_label->name.'-'.uc($reg).'0' };
  269         735  
448             }
449             }
450             }
451             }
452 15018         292996 print $fh $instr->asm;
453            
454 15018         232361 return $instr->next_addr;
455             }
456             else {
457             # block of defb
458              
459             # search for next defined instr
460 39         70 my $p;
461 39   100     149 for ($p = $addr; $p <= $max && ! defined($self->instr->[$p]) ; $p++) {
462             ;
463             }
464              
465 39         350349 my $comment = "unknown area ".format_hex4($addr)." to ".format_hex4($p-1);
466 39         163 print $fh "\n", " " x 8, "; Start of $comment\n";
467            
468             # print for $addr in blocks of 16
469 39         107 while ($addr < $p) {
470 2496         4516 my $max_count = $p - $addr;
471 2496         4377 my $count = BPL - ($addr % BPL); # until end of addr block
472 2496 100       4761 $count = $max_count if $count > $max_count; # until $p
473            
474 2496         6291 my $instr = CPU::Z80::Disassembler::Instruction
475             ->defb($self->memory, $addr, $count);
476 2496         7493 print $fh $instr->asm;
477 2496         44622 $addr += $count;
478             }
479              
480 39         124 print $fh " " x 8, "; End of $comment\n\n";
481            
482 39         173 return $addr;
483             }
484             }
485              
486             #------------------------------------------------------------------------------
487             sub _write_map {
488 9     9   32 my($self, $fh) = @_;
489            
490 9         39 my $it = $self->memory->loaded_iter;
491 9         38 while (my($min, $max) = $it->()) {
492 7         37 for my $addr ($min .. $max-1) {
493 65553 100 100     204772 if ($addr == $min || ($addr % 0x50) == 0) {
494 823         2404 print $fh "\n; ", format_hex4($addr), " ";
495             }
496 65553         123012 print $fh $self->get_type($addr);
497             }
498 7         73 print $fh "\n";
499             }
500             }
501              
502             #------------------------------------------------------------------------------
503             sub _write_labels {
504 9     9   30 my($self, $fh) = @_;
505            
506 9         44 my @labels = $self->labels->search_all;
507 9 100       470 return unless @labels;
508            
509 6         62 my $len = $self->labels->max_length;
510            
511 6         193 my @by_name = sort { lc($a->name) cmp lc($b->name) } @labels;
  6915         109018  
512 6         397 my @by_addr = sort { $a->addr <=> $b->addr } @labels;
  14990         234757  
513              
514 6         326 print $fh "\n; Labels\n;\n";
515 6         58 for (0 .. $#labels) {
516 2031         4265 print $fh "; ", format_hex4($by_addr[$_]->addr), " => ",
517             sprintf("%-${len}s", $by_addr[$_]->name),
518             " " x 8,
519             sprintf("%-${len}s", $by_name[$_]->name), " => ",
520             format_hex4($by_name[$_]->addr), "\n";
521             }
522             }
523              
524             #------------------------------------------------------------------------------
525             sub _write_check_calls {
526 9     9   32 my($self, $fh) = @_;
527              
528 9         22 my %unknown_calls;
529 9         23 for my $addr (keys %{$self->_can_call}) {
  9         130  
530 359 100       3863 $unknown_calls{$addr}++ unless $self->_can_call->{$addr};
531             }
532 9         160 for my $addr (keys %{$self->_call_instr}) {
  9         55  
533 10         159 my $instr = $self->_get_instr($addr);
534 10         140 $unknown_calls{$instr->NN}++;
535             }
536            
537 9 100       501 if (%unknown_calls) {
538             print $fh "\n\n; Check these calls manualy: ",
539 3         20 join(", ", sort map {format_hex4($_)} keys %unknown_calls),
  22         49  
540             "\n\n";
541             }
542             }
543              
544             #------------------------------------------------------------------------------
545             sub _opt_output_fh {
546 18     18   53 my($file) = @_;
547            
548             # open file
549 18         35 my $fh;
550 18 100       60 if (defined $file) {
551 15 50       1736 open($fh, ">", $file) or croak("write $file: $!");
552             }
553             else {
554 3         9 $fh = \*STDOUT;
555             }
556              
557 18         88 $fh;
558             }
559              
560             #------------------------------------------------------------------------------
561              
562             =head2 set_type_code, set_type_byte, set_type_word
563              
564             Sets the type of the given address. An optional count allows the definitions of
565             the type of consecutive memory locations.
566              
567             It is an error to set a type of a not-defined memory location,
568             or to redefine a type.
569              
570             =cut
571              
572             #------------------------------------------------------------------------------
573             sub _set_type {
574 17825     17825   39692 my($self, $type, $addr, $count) = @_;
575 17825   100     39311 $count ||= 1;
576            
577 17825 50       92871 croak("Invalid type $type") unless $type =~ /$TYPES_RE/;
578            
579 17825         43294 for ( ; $count > 0 ; $count--, $addr++ ) {
580 30586         181303 my $current_type = $self->get_type($addr);
581            
582 30585 100 100     81849 croak("Changing type of address ".format_hex4($addr)." from ".
      100        
583             "$current_type to $type")
584             if ($current_type ne TYPE_UNKNOWN &&
585             $type ne TYPE_UNKNOWN &&
586             $current_type ne $type);
587            
588 30584         70925 $self->_type->poke($addr, ord($type));
589             }
590             }
591 12203     12203 1 125141 sub set_type_code { shift->_set_type( TYPE_CODE, @_ ) }
592 4968     4968 1 50862 sub set_type_byte { shift->_set_type( TYPE_BYTE, @_ ) }
593 316     316 1 3379 sub set_type_word { shift->_set_type( TYPE_WORD, @_ ) }
594            
595             #------------------------------------------------------------------------------
596              
597             =head2 get_type
598              
599             Gets the type at the given address, one of TYPE_UNKNOWN, TYPE_CODE, TYPE_BYTE or
600             TYPE_WORD constants.
601              
602             It is an error to set a type of a not-defined memory location.
603              
604             =cut
605              
606             #------------------------------------------------------------------------------
607             sub get_type {
608 133093     133093 1 438428 my($self, $addr) = @_;
609            
610 133093 100       282807 croak("Getting type of unloaded memory at ".format_hex4($addr))
611             unless defined $self->memory->peek($addr);
612            
613 133092         1564014 my $current_type = $self->_type->peek($addr);
614 133092 100       1422189 $current_type = defined($current_type) ? chr($current_type) : TYPE_UNKNOWN;
615            
616 133092 50       581170 croak("Invalid type $current_type") unless $current_type =~ /$TYPES_RE/;
617            
618 133092         358314 return $current_type;
619             }
620              
621             #------------------------------------------------------------------------------
622              
623             =head2 set_call
624              
625             Declares a subroutine at the given address, either with no stack impact
626             (if 1 is passed as argument) or with a stack impact to be computed by the
627             given code reference. This function is called with $self and the address
628             after the call instruction as arguments and should return the next address(es)
629             where the code stream shall continue.
630              
631             =cut
632              
633             #------------------------------------------------------------------------------
634             sub set_call {
635 33     33 1 378 my($self, $addr, $can_call) = @_;
636 33         55 $self->_can_call->{$addr} = $can_call;
637             }
638              
639             #------------------------------------------------------------------------------
640              
641             =head2 code
642              
643             Declares the given address and all following instructions up to an unconditional
644             jump as a block of code, with an optional label.
645              
646             =cut
647              
648             #------------------------------------------------------------------------------
649             sub _get_instr {
650 157806     157806   283890 my($self, $addr) = @_;
651              
652             # read from cache or disassemble
653 157806   66     311022 $self->instr->[$addr] ||=
654             CPU::Z80::Disassembler::Instruction->disassemble($self->memory, $addr);
655             }
656            
657             sub code {
658 8593     8593 1 53835 my($self, $addr, $label) = @_;
659              
660 8593 100       19689 defined($label) and $self->labels->add($addr, $label);
661            
662 8593         16064 my @stack = ($addr); # all addresses to investigate
663            
664             # check calls
665 8593         20549 while (@stack) {
666             # follow all streams of code
667 8787         18038 while (@stack) {
668 23296         39876 my $addr = pop @stack;
669            
670             # if address is not loaded, assume a ROM entry point
671 23296 100       56135 if (!defined $self->memory->peek($addr)) {
672 2 50       26 if (!$self->labels->search_addr($addr)) {
673 0         0 my $instr = $self->labels->add($addr);
674             }
675 2         32 next;
676             }
677            
678             # skip if already checked
679 23294 100       292410 next if $self->get_type($addr) eq TYPE_CODE;
680            
681             # get instruction and mark as code
682 12200         28611 my $instr = $self->_get_instr($addr);
683 12200         40682 $self->set_type_code($addr, $instr->size);
684            
685             # create labels for branches (jump or call)
686 12200 100       152711 if ($instr->is_branch) {
687 3335         47703 my $branch_addr = $instr->NN;
688 3335         33474 my $label = $self->labels->add($branch_addr, undef, $addr);
689 3335     2510   19733 $instr->format->{NN} = sub { $label->name };
  2510         7228  
690             }
691            
692             # check call / rst addresses
693 12200 100       154343 if ($instr->is_call) {
694 1749         24517 my $call_addr = $instr->NN;
695 1749         17696 my $can_call = $self->_can_call->{$call_addr};
696 1749 100       20687 if (! defined $can_call) {
    100          
    100          
697 401         1072 $self->_call_instr->{$addr}++; # mark road block
698             }
699             elsif (ref $can_call) {
700 116         345 push @stack, $can_call->($self, $instr->next_addr);
701             # call sub to handle impact
702             }
703             elsif ($can_call) {
704 1156         3013 push @stack, $instr->next_addr; # can continue
705             }
706             }
707            
708             # continue on next addresses
709 12200         151324 push @stack, $instr->next_code;
710             }
711            
712             # check if we can unwind any blocked calls, after all paths without calls are
713             # exhausted
714 8787         21148 push @stack, $self->_check_call_instr;
715             }
716             }
717              
718             #------------------------------------------------------------------------------
719             sub _check_call_instr {
720 8787     8787   16272 my($self) = @_;
721              
722 8787         12078 my @stack;
723            
724             # check simple call instructions where we blocked
725 8787         12182 for my $addr (keys %{$self->_call_instr}) {
  8787         18615  
726 13640         64085 my $instr = $self->_get_instr($addr);
727 13640         155298 my $call_addr = $instr->NN;
728            
729 13640 100 66     129356 if ( # if any of the calls is conditional, then _can_call
730             $instr->opcode =~ /call \w+,NN/
731             ||
732             # if address after the call is CODE, then _can_call
733             $self->get_type($instr->next_addr) eq TYPE_CODE
734             ) {
735            
736             # mark for later; do not call code() directly because we are
737             # iterating over _call_instr that might be changed by code()
738 48         217 $self->_can_call->{$call_addr} = 1;
739 48         658 push @stack, $instr->next_addr; # code from here
740 48         939 delete $self->_call_instr->{$addr}; # processed
741             }
742             }
743            
744             # check remaining by following code flow
745 8787         57887 for my $addr (keys %{$self->_call_instr}) {
  8787         18527  
746 13592         61840 my $instr = $self->_get_instr($addr);
747 13592         152925 my $call_addr = $instr->NN;
748            
749             # if call flow in called subroutine
750             # does not pop return address, than _can_call
751 13592         126287 my $can_call = $self->_check_call($call_addr);
752 13592 100       35915 if (defined $can_call) {
753 338         786 $self->_can_call->{$call_addr} = $can_call;
754 338         3750 push @stack, $addr; # re-check call to call can_call
755 338         876 $self->_set_type(TYPE_UNKNOWN, $addr, $instr->size);
756             # allow recheck to happen
757 338         4382 delete $self->_call_instr->{$addr}; # processed
758             }
759             }
760            
761 8787         99312 return @stack;
762             }
763              
764             #------------------------------------------------------------------------------
765             sub _check_call {
766 13592     13592   23715 my($self, $call_addr) = @_;
767            
768 13592         19887 my %seen; # addresses we have checked
769 13592         21710 my($addr, $sp_level) = ($call_addr, 0);
770 13592         32276 my @stack = ([$addr, $sp_level]); # all addresses to investigate
771            
772             # follow code
773 13592         29170 while (@stack) {
774 133267         1890423 ($addr, $sp_level) = @{pop @stack};
  133267         242847  
775 133267 100       386589 next if $seen{$addr}++; # prevent loops
776            
777             # run into some known code
778 118425         257422 my $can_call = $self->_can_call->{$addr};
779 118425 100       1142195 if (defined $can_call) {
780 86 100       554 return $can_call if $sp_level == 0;
781             }
782              
783             # if address is not loaded, return "dont know"
784 118340 100       227356 if (!defined $self->memory->peek($addr)) {
785 1         25 return undef;
786             }
787              
788             # get the instruction
789 118339         1391417 my $instr = $self->_get_instr($addr);
790 118339         1223989 local $_ = $instr->opcode;
791            
792             # check stack impact
793 118339 100       1478829 if (/ret/) {
    100          
    100          
    50          
    50          
    100          
    100          
794 243 100       1756 return 1 if $sp_level == 0; # can call if stack empty
795             }
796             elsif (/push/) {
797 15352         26895 $sp_level += 2;
798             }
799             elsif (/pop/) {
800 2301         4176 $sp_level -= 2;
801 2301 100       5024 return 0 if $sp_level < 0; # STACK IMPACT!
802             }
803             elsif (/dec sp/) {
804 0         0 $sp_level++;
805             }
806             elsif (/inc sp/) {
807 0         0 $sp_level--;
808 0 0       0 return 0 if $sp_level < 0; # STACK IMPACT!
809             }
810             elsif (/ex \(sp\),/) {
811 25 100       105 return 0 if $sp_level < 2; # STACK IMPACT!
812             }
813             elsif (/ld sp/) {
814 1         18 return 0; # STACK IMPACT!
815             }
816            
817             # continue on next address, but dont follow calls
818 118086 100       271902 if ($instr->is_call) {
    100          
819 15426         200891 my $can_call = $self->_can_call->{$instr->NN};
820 15426 100 100     300610 if (defined($can_call) && !ref($can_call) && $can_call) {
      100        
821 2468         6877 push @stack, [$instr->next_addr, $sp_level]; # continue after call
822             }
823             }
824             elsif ($instr->is_branch) {
825 17412         221422 push @stack, [$instr->NN, $sp_level];
826             }
827            
828 118086 100       1212164 push @stack, [$instr->next_addr, $sp_level] unless $instr->is_break_flow;
829             }
830            
831 13253         170195 return undef; # don't know
832             }
833              
834             #------------------------------------------------------------------------------
835              
836             =head2 defb, defb2, defw, defm, defmz, defm7
837              
838             Declares the given address as a def* instruction
839             with an optional label.
840              
841             =cut
842              
843             #------------------------------------------------------------------------------
844             sub _def {
845 5283     5283   10215 my($self, $factory, $set_type,
846             $addr, $count, $label) = @_;
847              
848 5283 100       10799 defined($label) and $self->labels->add($addr, $label);
849            
850 5283         13848 my $instr = CPU::Z80::Disassembler::Instruction
851             ->$factory($self->memory, $addr, $count);
852 5283         16622 $self->instr->[$addr] = $instr;
853 5283         55458 $self->$set_type($addr, $instr->size);
854            
855 5283         64093 return $instr;
856             }
857              
858             sub defb {
859 4761     4761 1 32135 my($self, $addr, $count, $label) = @_;
860 4761         10705 $self->_def('defb', 'set_type_byte', $addr, $count, $label);
861             }
862              
863             sub defb2 {
864 0     0 1 0 my($self, $addr, $count, $label) = @_;
865 0         0 $self->_def('defb2', 'set_type_byte', $addr, $count, $label);
866             }
867              
868             sub defw {
869 316     316 1 1015 my($self, $addr, $count, $label) = @_;
870 316         836 $self->_def('defw', 'set_type_word', $addr, $count, $label);
871             }
872              
873             sub defm {
874 78     78 1 1079 my($self, $addr, $length, $label) = @_;
875 78         174 $self->_def('defm', 'set_type_byte', $addr, $length, $label);
876             }
877              
878             sub defmz {
879 0     0 1 0 my($self, $addr, $count, $label) = @_;
880 0         0 $self->_def('defmz', 'set_type_byte', $addr, $count, $label);
881             }
882              
883             sub defm7 {
884 128     128 1 2379 my($self, $addr, $count, $label) = @_;
885 128         282 $self->_def('defm7', 'set_type_byte', $addr, $count, $label);
886             }
887              
888             #------------------------------------------------------------------------------
889              
890             =head2 block_comment
891              
892             Creates a block comment to insert before the given address.
893              
894             =cut
895              
896             #------------------------------------------------------------------------------
897             sub block_comment {
898 12525     12525 1 244036 my($self, $addr, $block_comment) = @_;
899            
900 12525 100       26215 if (defined $block_comment) {
901 6060         8999 chomp($block_comment);
902 6060   100     11866 $self->_block_comments->[$addr] ||= "";
903 6060         70334 $self->_block_comments->[$addr] .= "$block_comment\n";
904             }
905             }
906              
907             #------------------------------------------------------------------------------
908              
909             =head2 line_comments
910              
911             Appends each of the given line comments to the instrutions starting at
912             the given address, one comment per instruction.
913              
914             =cut
915              
916             #------------------------------------------------------------------------------
917             sub line_comments {
918 6747     6747 1 15116 my($self, $addr, @line_comments) = @_;
919            
920 6747         13056 for (@line_comments) {
921 6747         17774 my $instr = $self->instr->[$addr];
922 6747 100       75901 croak("Cannot set comment of unknown instruction at ".format_hex4($addr))
923             unless $instr;
924 6746   100     14817 my $old_comment = $instr->comment // "";
925 6746 100       78778 $old_comment .= "\n" if $old_comment;
926 6746         22642 $instr->comment($old_comment . $_);
927 6746         80398 $addr += $instr->size;
928             }
929             }
930              
931             #------------------------------------------------------------------------------
932              
933             =head2 relative_arg
934              
935             Shows the instruction argument (NN or N) relative to a given label name.
936             Label name can be '$' for a value relative to the instruction pointer.
937              
938             =cut
939              
940             #------------------------------------------------------------------------------
941              
942             =head2 create_control_file
943              
944             $dis->create_control_file($ctl_file, $bin_file, $addr, $arch);
945              
946             Creates a new control file for the given input binary file, starting at the given address
947             and for the given architecture.
948              
949             The address defaults to zero, and the architecture to undefined. The architecture may be
950             implemented in the future, for example to define system variable equates for the given
951             architecture.
952              
953             It is an error to overwrite a control file.
954              
955             The Control File is the input file for a disassembly run in an interactive disassembly
956             session, and the outout is the .asm. After each run, the user studies the output
957             .asm file, and includes new commands in the control file to add information to the
958             .asm file on the next run.
959              
960             This function creates a template control file that contains just the hex dump of the
961             binary file and the decoded assembly instruction at each address, e.g.
962              
963             0000 :F
964             0000 D3FD out ($FD),a
965             0002 01FF7F ld bc,$7FFF
966             0005 C3CB03 jp $03CB
967              
968             The control file commands start with a ':' and refer to the hexadecimal address at the
969             start of the line.
970              
971             Some commands operate on a range of addresses and accept the inclusive range limits separated
972             by a single '-'.
973              
974             A line starting with a blank uses the same address as the previous command.
975              
976             A semicolon starts a comment in the control file.
977              
978             0000 :; define next address as 0x0000
979             : ; at the same address 0x0000
980             0000-001F :B ; define a range address of bytes
981              
982             The dump between the address and the ':' is ignored and is helpfull as a guide while adding
983             information to the control file.
984              
985             =head2 load_control_file
986              
987             $dis->load_control_file($ctl_file);
988              
989             Load the control file created by and subsequently edited by the user
990             and create a new .asm disassembly file.
991              
992             =head1 Control File commands
993              
994             =head2 Include
995              
996             Include another control file at the current location.
997              
998             #include vars.ctl
999              
1000             =head2 File
1001              
1002             Load a binary file at the given address.
1003              
1004             0000 :F zx81.rom
1005              
1006             =head2 Code
1007              
1008             Define the start of a code routine, with an optional label. The code is not known to be
1009             stack-safe, i.e. not to have data bytes following the call instruction. The disassembler
1010             stops disassembly when it cannot determine if the bytes after a call instruction are
1011             data or code.
1012              
1013             0000 :C START
1014              
1015             =head2 Procedure
1016              
1017             Define the start of a procedure with a possible list of arguments following the call
1018             instruction.
1019              
1020             The signature is a list of {'B','W','C'}+, identifing each of the following items
1021             after the call instruction (Byte, Word or Code). In the following example the call
1022             istruction is followed by one byte and one word, and the procedure returns
1023             to the address after the word.
1024              
1025             0000 P proc B,W,C
1026              
1027             The signature defaults to a single 'C', meaning the procedure returns to the point after call.
1028              
1029             A signature without a 'C' means that the call never returns.
1030              
1031             =head2 Bytes and Words
1032              
1033             Define data bytes and words in the given address range.
1034              
1035             0000-0003 :B label
1036             0000-0003 :B label
1037             0000-0003 :B2[1] label ; one byte per line, binary data
1038             0000-0003 :W label
1039              
1040             =head2 Define a symbol
1041              
1042             Define the name of a symbol.
1043              
1044             4000 := ERR_NO comment\nline 2 of comment
1045              
1046             =head2 IX and IY base
1047              
1048             Define base address for IX and IY indexed mode.
1049              
1050             4000 :IX
1051             4000 :IY
1052              
1053             =head2 Header block
1054              
1055             Define a text block to be output before the given address. The block is inserted vervbatin,
1056             so include ';' if a comment is intended.
1057              
1058             0000 :# ; header
1059             :# ; continuation
1060             :# abc EQU 23
1061              
1062             =head2 Line comment
1063              
1064             Define a line comment to show at the given address.
1065              
1066             0000 :; comment
1067              
1068             =head2 Header and Footer
1069              
1070             Define a text block to be output at the top and the bottom of the assembly file.
1071             The block is inserted vervbatin, so include ';' if a comment is intended.
1072              
1073             0000 :< ; header
1074             :< ; continuation
1075             :> ; footer
1076              
1077             =cut
1078              
1079             #------------------------------------------------------------------------------
1080              
1081             sub _find_file {
1082 5     5   119 my($self, $from_file, $include_file) = @_;
1083            
1084 5 100       137 return $include_file if -f $include_file;
1085            
1086             # test relative to parent
1087 1         11 my $relative = path(path($from_file)->parent, path($include_file)->basename);
1088 1 50       271 return $relative if -f $relative;
1089            
1090 0         0 return $from_file;
1091             }
1092              
1093             #------------------------------------------------------------------------------
1094              
1095             sub create_control_file {
1096 5     5 1 27655993 my($class, $ctl_file, $bin_file, $addr, $arch) = @_;
1097            
1098 5 100       137 -f $ctl_file and die "Error: $ctl_file exists\n";
1099            
1100 4         34 my $dis = $class->new;
1101 4         31 $dis->memory->load_file($bin_file, $addr);
1102 4         77 $dis->write_dump($ctl_file);
1103 4         78 my @lines = ( <
1104             ;------------------------------------------------------------------------------
1105             ; CPU::Z80::Disassembler control file
1106             ;------------------------------------------------------------------------------
1107              
1108             END
1109             sprintf("%04X :F $bin_file\n\n", $addr),
1110             path($ctl_file)->lines
1111             );
1112 4         20495 path($ctl_file)->spew(@lines);
1113             }
1114              
1115             #------------------------------------------------------------------------------
1116              
1117             sub load_control_file {
1118 5     5 1 449 my($self, $file) = @_;
1119            
1120 5         20 my $addr = 0; my $end_addr = 0;
  5         15  
1121 5 50       258 open(my $fh, $file) or die "cannot open $file\n";
1122 5         335 while (<$fh>) {
1123 41525         190709 chomp;
1124 41525         81869 s/^\s*;.*$//; # remove comments
1125 41525         90863 s/\s+$//;
1126 41525 100       108476 next unless /\S/;
1127              
1128 36285 100       69741 if (/^ \#include \s+ (\S+) /ix) {
1129 1         4 $self->load_control_file($self->_find_file($file, $1));
1130             }
1131             else {
1132             # decode start address
1133 36284 100       111082 if (s/^ ([0-9a-f]+) //ix) {
1134 26213         56924 $addr = hex($1);
1135             }
1136              
1137             # decode end address
1138 36284         53894 $end_addr = $addr;
1139 36284 100       70907 if (s/^ -([0-9a-f]+) //ix) {
1140 1463         2819 $end_addr = hex($1);
1141             }
1142              
1143             # remove all chars up to ':', ignore lines without ':'
1144 36284 100       111081 /:\s*/ or next;
1145 20549         51128 $_ = $';
1146 20549 50       47284 next unless /\S/;
1147            
1148             # decode command
1149 20549         32228 my($include_file, $label, $comment, $signature, $type);
1150            
1151             # File
1152 20549 100       132453 if (($include_file) = /^ F \s+ (\S+) /ix) {
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    100          
    50          
    50          
1153 4         29 $self->memory->load_file($self->_find_file($file, $include_file), $addr);
1154             }
1155            
1156             # Code
1157             elsif (($label) = /^ C \s* (\w+)? /ix) {
1158 7506         19662 $self->code($addr, $label);
1159             }
1160            
1161             # Define label
1162             elsif (($label, $comment) = /^ = \s+ (\S+) \s* ;? \s*(.*)/ix) {
1163 40         73 $comment =~ s/ \\ n /\n/gx;
1164 40         98 my $instr = $self->labels->add($addr, $label);
1165 40 50       104 $instr->comment($comment) if $comment;
1166             }
1167            
1168             # Block comment
1169             elsif (($comment) = /^ \# \s? (.*)/ix) {
1170 3270         7174 $self->block_comment($addr, $comment);
1171             }
1172            
1173             # Header
1174             elsif (($comment) = /^ \< \s? (.*)/ix) {
1175 56   100     149 my $header = $self->header // "";
1176 56 100       632 $header .= "\n" if $header;
1177 56         228 $self->header($header.$comment);
1178             }
1179            
1180             # Footer
1181             elsif (($comment) = /^ \> \s? (.*)/ix) {
1182 0   0     0 my $footer = $self->footer // "";
1183 0 0       0 $footer .= "\n" if $footer;
1184 0         0 $self->footer($footer.$comment);
1185             }
1186            
1187             # Line comment
1188             elsif (($comment) = /^ \; [\s;]* (.*)/ix) {
1189 6746         16700 $self->line_comments($addr, $comment);
1190             }
1191            
1192             # Procedure
1193             elsif (($label, $signature) = /^ P \s+ (\w+) \s* (.*)/ix) {
1194 0         0 $self->code($addr, $label);
1195 0         0 $signature =~ s/,/ /g;
1196 0         0 my @types = split(' ', $signature);
1197 0 0       0 @types = ('C') if !@types;
1198             $self->set_call($addr, sub {
1199 0     0   0 my($self, $addr) = @_;
1200 0         0 for (@types) {
1201 0 0       0 if ($_ eq 'B') {
    0          
    0          
1202 0         0 $self->defb($addr);
1203 0         0 $addr++
1204             }
1205             elsif ($_ eq 'W') {
1206 0         0 $self->defW($addr);
1207 0         0 $addr += 2;
1208             }
1209             elsif ($_ eq 'C') {
1210 0         0 return $addr;
1211             }
1212             else {
1213 0         0 die "procedure argument type $_ unknown";
1214             }
1215             }
1216 0         0 return;
1217 0         0 });
1218             }
1219            
1220             # Byte | Word
1221             elsif (my($type, $ipl, $label) = /^ (B2 | B | W | M) (?: \[ (\d+) \] )? \s* (\w+)?/ix) {
1222 2926 100       6819 $self->labels->add($addr, $label) if defined $label;
1223 2926 50       5503 $ipl = 16 unless $ipl;
1224              
1225 2926         4370 my($func, $size);
1226 2926 100       5395 if ($type eq 'B') { ($func, $size) = ('defb', 1); }
  2724 50       4578  
    50          
    0          
1227 0         0 elsif ($type eq 'B2') { ($func, $size) = ('defb2', 1); }
1228 202         392 elsif ($type eq 'W') { ($func, $size) = ('defw', 2); }
1229 0         0 elsif ($type eq 'M') { ($func, $size) = ('defm', 1); $ipl = 32; }
  0         0  
1230 0         0 else { die "type $type unknown"; }
1231            
1232 2926 100 100     6483 if ($size == 2 && $addr == $end_addr) {
1233 101         146 $end_addr++; # a word uses two addresses
1234             }
1235            
1236 2926         6375 for (my $a = $addr; $a <= $end_addr; ) {
1237 2926         6882 my $items = int(($end_addr - $a + 1) / $size);
1238 2926 50       5812 $items = $ipl if $items > $ipl;
1239            
1240 2926         9160 $self->$func($a, $items);
1241 2926         17414 $a += $size * $items;
1242             }
1243             }
1244            
1245             # IX
1246             elsif (/^ IX /ix) {
1247 0         0 $self->ix_base($addr);
1248             }
1249            
1250             # IY
1251             elsif (/^ IY /ix) {
1252 1         5 $self->iy_base($addr);
1253             }
1254            
1255             # undefined
1256             else {
1257 0         0 die "Load '$file': cannot parse '$_'";
1258             }
1259             }
1260             }
1261             }
1262            
1263             #------------------------------------------------------------------------------
1264             sub relative_arg {
1265 25     25 1 435 my($self, $addr, $label_name) = @_;
1266              
1267             # disassemble from here, if needed
1268 25         80 $self->code($addr);
1269 25 50       393 my $instr = $self->_get_instr($addr) or die;
1270            
1271 25         317 my $label_addr;
1272 25 100       63 if ($label_name eq '$') {
1273 3         8 $label_addr = $instr->addr;
1274             }
1275             else {
1276 22 100       51 my $label = $self->labels->search_name($label_name)
1277             or croak("Label '$label_name' not found");
1278 21         327 $label_addr = $label->addr;
1279             }
1280            
1281 24 100       264 my $NN = defined($instr->NN) ? 'NN' :
    100          
1282             defined($instr->N ) ? 'N' :
1283             croak("Instruction at address ".format_hex4($addr).
1284             " has no arguments");
1285 23         311 my $arg = $instr->$NN;
1286 23 50       238 $arg = [$arg] unless ref $arg; # defb stores as [N]
1287            
1288 23         49 my $delta = $arg->[0] - $label_addr;
1289 23         83 my $expr = $label_name . format_dis($delta);
1290 23     23   153 $instr->format->{$NN} = sub { $expr };
  23         202  
1291             }
1292              
1293             #------------------------------------------------------------------------------
1294              
1295             =head1 ACKNOWLEDGEMENTS
1296              
1297             =head1 AUTHOR
1298              
1299             Paulo Custodio, C<< >>
1300              
1301             =head1 BUGS and FEEDBACK
1302              
1303             Please report any bugs or feature requests through
1304             the web interface at
1305             L.
1306              
1307             =head1 LICENSE AND COPYRIGHT
1308              
1309             Copyright 2010 Paulo Custodio.
1310              
1311             This program is free software; you can redistribute it and/or modify it
1312             under the terms of either: the GNU General Public License as published
1313             by the Free Software Foundation; or the Artistic License.
1314              
1315             See http://dev.perl.org/licenses/ for more information.
1316              
1317             The Spectrum 48K ROM used in the test scripts is Copyright by Amstrad.
1318             Amstrad have kindly given their permission for the
1319             redistribution of their copyrighted material but retain that copyright
1320             (see L).
1321              
1322             =cut
1323              
1324             1;