File Coverage

blib/lib/CPU/Z80/Disassembler/Instruction.pm
Criterion Covered Total %
statement 129 131 98.4
branch 56 62 90.3
condition 13 14 92.8
subroutine 27 28 96.4
pod 18 18 100.0
total 243 253 96.0


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler::Instruction;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler::Instruction - One Z80 disassembled instruction
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 7     7   968 use strict;
  7         16  
  7         210  
14 7     7   34 use warnings;
  7         17  
  7         213  
15              
16 7     7   25269 use Asm::Z80::Table;
  7         364700  
  7         443  
17 7     7   949 use CPU::Z80::Disassembler::Memory;
  7         14  
  7         108  
18 7     7   279 use CPU::Z80::Disassembler::Format;
  7         21  
  7         945  
19              
20             our $VERSION = '1.00';
21              
22             #------------------------------------------------------------------------------
23              
24             =head1 SYNOPSIS
25              
26             use CPU::Z80::Disassembler::Instruction;
27             $instr = CPU::Z80::Disassembler::Instruction->disassemble(
28             $memory, $addr, $limit_addr);
29             $instr = CPU::Z80::Disassembler::Instruction->defb($memory, $addr, $count);
30             $instr = CPU::Z80::Disassembler::Instruction->defb2($memory, $addr, $count);
31             $instr = CPU::Z80::Disassembler::Instruction->defw($memory, $addr, $count);
32             $instr = CPU::Z80::Disassembler::Instruction->defm($memory, $addr, $length);
33             $instr = CPU::Z80::Disassembler::Instruction->defmz($memory, $addr);
34             $instr = CPU::Z80::Disassembler::Instruction->defm7($memory, $addr);
35             $instr = CPU::Z80::Disassembler::Instruction->org($memory, $addr);
36            
37             $instr->addr; $instr->next_addr;
38             $instr->bytes; $instr->opcode; $instr->N; $instr->NN; $instr->DIS; $instr->STR;
39             $instr->comment;
40             print $instr->dump;
41             print $instr->asm;
42             print $instr->as_string, "\n";
43              
44             =head1 DESCRIPTION
45              
46             This module represents one disassembled instruction. The object is
47             constructed by one of the factory methods, and has attributes to ease the
48             interpretation of the instruction.
49              
50             =head1 CONSTRUCTORS
51              
52             =head2 disassemble
53              
54             Factory method to create a new object by disassembling the given
55             L object
56             at the given address.
57              
58             The C<$limit_addr> argument, if defined, tells the disassembler to select
59             the longest possible instruction, that does not use the byte at C<$limit_add>.
60             The default is to select the shortest possible instruction.
61              
62             For example, the sequence of bytes C<62 6B> is decoded as C if
63             C<$limit_addr> is undef.
64              
65             If C<$limit_addr> is defined with any value different from C<$addr + 1>, where
66             the second byte is stored, then the same sequence of bytes is decoded as
67             C.
68              
69             To decode standard Z80 instructions, do not pass the C<$limit_addr> argument.
70              
71             To decode extended Z80 instructions, pass the address of the next label after
72             C<$addr>, or 0x10000 to get always the longest instruction.
73              
74             If the instruction at the given address is an invalid opcode, or if there
75             are no loaded bytes at the given address, the instrution object is not
76             constructed and the factory returns C.
77              
78             =head2 defb
79              
80             Factory method to create a new object by disassembling a C instruction
81             at the given address, reading one or C<$count> byte(s) from memory.
82              
83             =head2 defb2
84              
85             Same as defb but shows binary data.
86              
87             =head2 defw
88              
89             Factory method to create a new object by disassembling a C instruction
90             at the given address, reading one or C<$count> word(s) from memory.
91              
92             =head2 defm
93              
94             Factory method to create a new object by disassembling a C instruction
95             at the given address, reading C<$length> character(s) from memory.
96              
97             =head2 defmz
98              
99             Factory method to create a new object by disassembling a C instruction
100             at the given address, reading character(s) from memory until a zero terminator
101             is found.
102              
103             =head2 defm7
104              
105             Factory method to create a new object by disassembling a C instruction
106             at the given address, reading character(s) from memory until a character
107             with bit 7 set is found.
108              
109             =head2 org
110              
111             Factory method to create a new ORG instruction.
112              
113             =cut
114              
115             #------------------------------------------------------------------------------
116              
117             =head1 ATTRIBUTES
118              
119             =head2 memory
120              
121             Point to the memory object from where this instruction was disassembled.
122              
123             =head2 addr
124              
125             Address of the instruction.
126              
127             =head2 size
128              
129             Size of the instruction, in bytes.
130              
131             =head2 next_addr
132              
133             Returns the address that follows this instruction.
134              
135             =head2 next_code
136              
137             Returns the list with the next possible addresses where the code flow can continue.
138              
139             For an instruction that does not branch, this is the same as C.
140              
141             For a decision-branch instruction, these are the C and the C.
142              
143             For an instruction that breaks the flow (e.g. C), this is an empty list.
144              
145             A C or C instruction is considered as breaking the flow, because
146             the called routine might manipulate the return pointer in the stack, and the
147             bytes after the C or C instruction can be data bytes.
148              
149             =head2 bytes
150              
151             Reference to a list of the instruction bytes. The bytes are retrieved
152             from the L
153             object.
154              
155             =head2 opcode
156              
157             Canonical assembly instruction, e.g. 'ld a,(NN)'.
158             The possible argument types are N, NN, DIS and STR.
159             There is one method to get/set each of the argument types.
160              
161             =head2 N
162              
163             8-bit data used by the instruction.
164              
165             =head2 N2
166              
167             8-bit data used by the instruction, to be shown in base 2.
168              
169             =head2 NN
170              
171             16-bit data used by the instruction.
172              
173             =head2 DIS
174              
175             Offset for index register.
176              
177             =head2 STR
178              
179             String for defm* instructions.
180              
181             =head2 comment
182              
183             Comment to be written after a '; ' at the end of the line.
184              
185             =head2 format
186              
187             Returs the hash of special formating functions for each type of argument. These
188             functions, if defined, are called instead of the ones in the
189             L module to format
190             each type of argument.
191              
192             For example, to format the 8-bit argument of an instruction as decimal:
193              
194             $instr->format->{N} = sub { my $v = shift; return "$v" };
195              
196             =cut
197              
198             #------------------------------------------------------------------------------
199              
200             =head1 PREDICATES
201              
202             =head2 is_code
203              
204             Return TRUE if the instruction is a Z80 assembly opcode, FALSE if it is one
205             of the data definition or org instructions.
206              
207             =head2 is_call
208              
209             Return TRUE if the instruction is a call instruction, i.e. C or C.
210              
211             =head2 is_branch
212              
213             Return TRUE if the instruction may branch to another address, the address is
214             stored in the C attribute. This is either a jump or a call instruction.
215              
216             =head2 is_break_flow
217              
218             Return TRUE if the instruction breaks the flow at this point and jumps to some
219             other part of the code. A call instruction is considered as breaking the flow,
220             see C above.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225 7     7   97 use base 'Class::Accessor';
  7         16  
  7         11870  
226             __PACKAGE__->mk_accessors(
227             'memory', # point to whole memory
228             'addr', # start address
229             'size', # number of bytes of instruction
230             'opcode', # canonical opcode, e.g. 'ld a,(NN)'
231             'N', # 8-bit data
232             'N2', # 8-bit data in binary
233             'NN', # 16-bit data
234             'DIS', # offset for index
235             'STR', # unquoted string for defm*
236             'comment', # comment after instruction
237             '_format', # hash of (N, NN, DIS, STR) => custom function to
238             # format each type of argument
239             'is_code', # true for a Z80 assembly instruction,
240             # false for def*, org, ...
241             );
242              
243             #------------------------------------------------------------------------------
244             sub format {
245 19953     19953 1 180376 my($self) = @_;
246 19953 100       39468 $self->_format({}) unless $self->_format;
247 19953         265788 $self->_format;
248             }
249              
250             #------------------------------------------------------------------------------
251             my %default_format = (
252             N => \&format_hex2,
253             N2 => \&format_bin8,
254             NN => \&format_hex4,
255             DIS => \&format_dis,
256             STR => \&format_str,
257             );
258              
259             #------------------------------------------------------------------------------
260             sub next_addr {
261 243317     243317 1 2937959 my($self) = @_;
262 243317         496167 $self->addr + $self->size;
263             }
264              
265             #------------------------------------------------------------------------------
266             sub next_code {
267 12239     12239 1 29762 my($self) = @_;
268 12239         18971 my @ret;
269 12239 100       23039 push @ret, $self->NN if $self->is_branch;
270 12239 100       173167 push @ret, $self->next_addr unless $self->is_break_flow;
271 12239         213735 @ret;
272             }
273              
274             #------------------------------------------------------------------------------
275             sub bytes {
276 45831     45831 1 78374 my($self) = @_;
277 45831         69419 my @bytes;
278 45831         88597 for my $addr ($self->addr .. $self->next_addr - 1) {
279 72282         1110176 push @bytes, $self->memory->peek($addr);
280             }
281 45831         563193 \@bytes;
282             }
283              
284             #------------------------------------------------------------------------------
285             # predicates
286 132413     132413 1 280190 sub is_call { shift->opcode =~ /call|rst/ }
287 127138     127138 1 1299645 sub is_branch { shift->opcode =~ /jp .*NN|jr|djnz|call|rst/ }
288 147924     147924 1 331384 sub is_break_flow { shift->opcode =~ /ret$|reti|retn|call NN|rst|jr NN|jp NN|jp \(\w+\)|org/ }
289              
290             #------------------------------------------------------------------------------
291             sub disassemble {
292 64250     64250 1 1049134 my($class, $memory, $addr, $limit_addr) = @_;
293              
294 64250         217392 my $self = bless { memory => $memory,
295             addr => $addr,
296             is_code => 1,
297             }, $class;
298              
299             # save bytes of all decoded instructions
300 64250         106655 my @found; # other instructions found
301            
302 64250         166552 my $table = Asm::Z80::Table->disasm_table;
303 64250   100     262666 for ( ;
304             # exit if second instruction goes over limit, e.g. label
305             ! (defined($limit_addr) && @found && $addr == $limit_addr) ;
306             $addr++
307             ) {
308             # fetch
309 100040         240049 my $byte = $memory->peek($addr);
310 100040 100       1151727 last unless defined $byte; # unloaded memory
311            
312             # lookup in table
313 100038 100       421517 if (exists $table->{N}) {
    100          
    100          
    100          
    100          
    100          
314 5871 50       15214 die if defined $self->N;
315 5871         62217 $self->N( $byte );
316 5871         63707 $table = $table->{N};
317             }
318             elsif (exists $table->{NNl}) {
319 10386 50       26081 die if defined $self->NN;
320 10386         114071 $self->NN( $memory->peek16u($addr++) );
321 10386         117812 $table = $table->{NNl}{NNh};
322             }
323             elsif (exists $table->{NNo}) {
324 6622 50       16361 die if defined $self->NN;
325 6622         72215 $self->NN( $addr + 1 + $memory->peek8s($addr) );
326 6622         72014 $table = $table->{NNo};
327             }
328             elsif (exists $table->{DIS}) {
329 1709 50       4626 die if defined $self->DIS;
330 1709         18724 $self->DIS( $memory->peek8s($addr) );
331 1709         19052 $table = $table->{DIS};
332             }
333             elsif (exists $table->{'DIS+1'}) {
334 21 50       69 die unless defined $self->DIS;
335 21 50       281 if ( $self->DIS + 1 != $memory->peek8s($addr) ) {
336 0         0 last; # abort search
337             }
338 21         61 $table = $table->{'DIS+1'};
339             }
340             elsif (! exists $table->{$byte}) {
341 5635         9666 last; # abort search
342             }
343             else {
344 69794         131733 $table = $table->{$byte};
345             }
346            
347             # check for end
348 94403 100       247849 if (exists $table->{''}) { # possible finish
349 64444         101618 push @found, [ [@{$table->{''}}], $addr + 1 ];
  64444         252646  
350             # save this instance, copy
351 64444 100       182811 last unless defined $limit_addr; # no limit -> shortest instr
352            
353             # continue for composite instruction
354             }
355             }
356            
357             # return undef if no instrution found
358 64250 100       131773 return undef unless @found;
359            
360             # collect last complete instruction found
361 64236         100680 my($opcode, @args) = @{$found[-1][0]};
  64236         170929  
362 64236 100       201732 $opcode .= ' '.join('', @args) if @args;
363 64236         229518 $opcode =~ s/,\s*/, /g;
364            
365 64236         209102 $self->opcode($opcode);
366 64236         751634 $self->size($found[-1][1] - $self->addr);
367            
368             # special case: rst -> show address in hex
369 64236 100       1196381 if ($opcode =~ /rst (\d+)/) {
370 5590         17710 $self->N($1); # set N for display
371 5590         65947 $self->NN($1); # set NN for analysis
372 5590         57392 $self->opcode('rst N');
373             }
374            
375 64236         329265 $self;
376             }
377              
378             #------------------------------------------------------------------------------
379             sub _def_value {
380 7596     7596   18392 my($class, $peek, $size, $def, $N,
381             $memory, $addr, $count) = @_;
382            
383 7596   100     19788 $count ||= 1;
384 7596         14519 my $values = [];
385 7596         18971 for my $i (0 .. $count - 1) {
386 45928         119489 my $value = $memory->$peek($addr + $size * $i); # read values
387 45928 100       497254 return undef unless defined $value; # unloaded memory
388            
389 45923         93771 $values->[$i] = $value;
390             }
391            
392 7591         47874 return bless { memory => $memory,
393             addr => $addr,
394             size => $size * $count,
395             opcode => "$def $N",
396             $N => $values,
397             }, $class;
398             }
399              
400             #------------------------------------------------------------------------------
401 7274     7274 1 84276 sub defb { shift->_def_value('peek8u', 1, 'defb', 'N', @_) }
402 0     0 1 0 sub defb2 { shift->_def_value('peek8u', 1, 'defb', 'N2', @_) }
403 322     322 1 3938 sub defw { shift->_def_value('peek16u', 2, 'defw', 'NN', @_) }
404              
405             #------------------------------------------------------------------------------
406             sub _def_str {
407 216     216   504 my($class, $peek, $eos_length, $def,
408             $memory, $addr, $length) = @_;
409            
410 216         654 my $str = $memory->$peek($addr, $length);
411 216 100       572 return undef unless defined $str; # unloaded memory
412            
413 210         1665 return $class->new({memory => $memory,
414             addr => $addr,
415             size => length($str) + $eos_length,
416             opcode => "$def STR",
417             STR => $str});
418             }
419              
420             #------------------------------------------------------------------------------
421 82     82 1 908 sub defm { shift->_def_str('peek_str', 0, 'defm', @_) }
422 3     3 1 12 sub defmz { shift->_def_str('peek_strz', 1, 'defmz', @_) }
423 131     131 1 1445 sub defm7 { shift->_def_str('peek_str7', 0, 'defm7', @_) }
424              
425             #------------------------------------------------------------------------------
426             sub org {
427 8     8 1 172 my($class, $memory, $addr) = @_;
428              
429 8         99 return bless { memory => $memory,
430             addr => $addr,
431             size => 0,
432             opcode => "org NN",
433             NN => $addr,
434             }, $class;
435             }
436              
437             #------------------------------------------------------------------------------
438              
439             =head1 FUNCTIONS
440              
441             =head2 as_string
442              
443             Returns the disassembled instruction opcode and arguments.
444              
445             =cut
446              
447             #------------------------------------------------------------------------------
448             # Format of the disassembled output
449             # 1 2 3 4 5 6 7
450             # 0123456789012345678901234567890123456789012345678901234567890123456789012
451             # # # # # # # # # # #
452             # AAAA H1H2H3H4H5 INSTR ; COMMENT
453             #
454             sub as_string {
455 63426     63426 1 143122 my($self) = @_;
456              
457             # decode opcode
458 63426         133647 my $opcode = $self->opcode;
459 63426         816726 $opcode =~ s{\b ( N | N2 | NN | \+(DIS) | STR ) \b
460             }{
461 33014   66     157257 $self->_format_arg($2 || $1)
462             }gex;
463            
464 63426         203426 my $comment = $self->comment;
465            
466 63426 100       628659 if (defined $comment) {
467 12130         26760 $comment =~ s/\n/ "\n" . " " x 32 . "; " /ge; # multi-line comment
  3251         8606  
468             }
469            
470 63426 100       220352 return !defined($comment) ?
    100          
471             $opcode :
472             length($opcode) >= 24 ?
473             $opcode . "\n" . " " x 32 . "; " . $comment :
474             sprintf("%-24s; %s", $opcode, $comment);
475             }
476              
477             sub _format_arg {
478 33014     33014   93244 my($self, $arg) = @_;
479              
480             my $ffunc = ( $self->_format && $self->format->{$arg} ?
481             $self->format->{$arg} :
482 33014 100 100     87205 $default_format{$arg}
483             );
484 33014         376471 my $value = $self->$arg;
485 33014 100       343685 $value = [$value] unless ref($value);
486            
487 33014         71454 return join(", ", map {$ffunc->($_)} @$value)
  71366         160942  
488             }
489              
490             #------------------------------------------------------------------------------
491              
492             =head2 dump
493              
494             Returns the disassembly dump ready to print, containing address, bytes and
495             instruction, followed by newline.
496              
497             =cut
498              
499             #------------------------------------------------------------------------------
500 7     7   76 use constant BPL => 5;
  7         18  
  7         2695  
501              
502             sub dump {
503 45792     45792 1 205864 my($self) = @_;
504              
505             # address
506 45792         96879 my $ret = sprintf("%04X ", $self->addr);
507            
508             # bytes
509 45792         508858 my $bytes = '';
510 45792         69289 for (@{$self->bytes}) {
  45792         97009  
511 72104         177510 $bytes .= sprintf("%02X", $_);
512             }
513            
514             # first line of bytes
515 45792         198763 $ret .= sprintf("%-*s ", BPL*2, substr($bytes, 0, BPL*2));
516 45792 100       126913 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
517            
518             # opcode
519 45792         109110 $ret .= $self->as_string . "\n";
520            
521             # next lines of bytes
522 45792         119714 while ($bytes ne '') {
523 20         81 $ret .= " " x 5 . sprintf("%-*s \n", BPL*2, substr($bytes, 0, BPL*2));
524 20 100       69 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
525             }
526            
527 45792         353657 $ret;
528             }
529              
530             #------------------------------------------------------------------------------
531              
532             =head2 asm
533              
534             Returns the disassembly asm line ready to print, containing
535             instruction and comments, followed by newline.
536              
537             =cut
538              
539             #------------------------------------------------------------------------------
540             sub asm {
541 17560     17560 1 31927 my($self) = @_;
542            
543 17560 100 100     35749 sprintf("%-7s %s\n%s", '',
544             $self->as_string,
545             ($self->is_break_flow && ! $self->is_call) ? "\n" : "");
546             }
547              
548             #------------------------------------------------------------------------------
549              
550             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE AND COPYRIGHT
551              
552             See L.
553              
554             =cut
555              
556             #------------------------------------------------------------------------------
557              
558             1;