File Coverage

blib/lib/CPU/Z80/Disassembler/Instruction.pm
Criterion Covered Total %
statement 129 131 98.4
branch 56 62 90.3
condition 13 14 92.8
subroutine 27 28 96.4
pod 18 18 100.0
total 243 253 96.0


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler::Instruction;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler::Instruction - One Z80 disassembled instruction
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 7     7   760 use strict;
  7         12  
  7         178  
14 7     7   38 use warnings;
  7         14  
  7         154  
15              
16 7     7   19465 use Asm::Z80::Table;
  7         298762  
  7         369  
17 7     7   664 use CPU::Z80::Disassembler::Memory;
  7         20  
  7         128  
18 7     7   217 use CPU::Z80::Disassembler::Format;
  7         12  
  7         750  
19              
20             our $VERSION = '1.02';
21              
22             #------------------------------------------------------------------------------
23              
24             =head1 SYNOPSIS
25              
26             use CPU::Z80::Disassembler::Instruction;
27             $instr = CPU::Z80::Disassembler::Instruction->disassemble(
28             $memory, $addr, $limit_addr);
29             $instr = CPU::Z80::Disassembler::Instruction->defb($memory, $addr, $count);
30             $instr = CPU::Z80::Disassembler::Instruction->defb2($memory, $addr, $count);
31             $instr = CPU::Z80::Disassembler::Instruction->defw($memory, $addr, $count);
32             $instr = CPU::Z80::Disassembler::Instruction->defm($memory, $addr, $length);
33             $instr = CPU::Z80::Disassembler::Instruction->defmz($memory, $addr);
34             $instr = CPU::Z80::Disassembler::Instruction->defm7($memory, $addr);
35             $instr = CPU::Z80::Disassembler::Instruction->org($memory, $addr);
36            
37             $instr->addr; $instr->next_addr;
38             $instr->bytes; $instr->opcode; $instr->N; $instr->NN; $instr->DIS; $instr->STR;
39             $instr->comment;
40             print $instr->dump;
41             print $instr->asm;
42             print $instr->as_string, "\n";
43              
44             =head1 DESCRIPTION
45              
46             This module represents one disassembled instruction. The object is
47             constructed by one of the factory methods, and has attributes to ease the
48             interpretation of the instruction.
49              
50             =head1 CONSTRUCTORS
51              
52             =head2 disassemble
53              
54             Factory method to create a new object by disassembling the given
55             L object
56             at the given address.
57              
58             The C<$limit_addr> argument, if defined, tells the disassembler to select
59             the longest possible instruction, that does not use the byte at C<$limit_add>.
60             The default is to select the shortest possible instruction.
61              
62             For example, the sequence of bytes C<62 6B> is decoded as C if
63             C<$limit_addr> is undef.
64              
65             If C<$limit_addr> is defined with any value different from C<$addr + 1>, where
66             the second byte is stored, then the same sequence of bytes is decoded as
67             C.
68              
69             To decode standard Z80 instructions, do not pass the C<$limit_addr> argument.
70              
71             To decode extended Z80 instructions, pass the address of the next label after
72             C<$addr>, or 0x10000 to get always the longest instruction.
73              
74             If the instruction at the given address is an invalid opcode, or if there
75             are no loaded bytes at the given address, the instrution object is not
76             constructed and the factory returns C.
77              
78             =head2 defb
79              
80             Factory method to create a new object by disassembling a C instruction
81             at the given address, reading one or C<$count> byte(s) from memory.
82              
83             =head2 defb2
84              
85             Same as defb but shows binary data.
86              
87             =head2 defw
88              
89             Factory method to create a new object by disassembling a C instruction
90             at the given address, reading one or C<$count> word(s) from memory.
91              
92             =head2 defm
93              
94             Factory method to create a new object by disassembling a C instruction
95             at the given address, reading C<$length> character(s) from memory.
96              
97             =head2 defmz
98              
99             Factory method to create a new object by disassembling a C instruction
100             at the given address, reading character(s) from memory until a zero terminator
101             is found.
102              
103             =head2 defm7
104              
105             Factory method to create a new object by disassembling a C instruction
106             at the given address, reading character(s) from memory until a character
107             with bit 7 set is found.
108              
109             =head2 org
110              
111             Factory method to create a new ORG instruction.
112              
113             =cut
114              
115             #------------------------------------------------------------------------------
116              
117             =head1 ATTRIBUTES
118              
119             =head2 memory
120              
121             Point to the memory object from where this instruction was disassembled.
122              
123             =head2 addr
124              
125             Address of the instruction.
126              
127             =head2 size
128              
129             Size of the instruction, in bytes.
130              
131             =head2 next_addr
132              
133             Returns the address that follows this instruction.
134              
135             =head2 next_code
136              
137             Returns the list with the next possible addresses where the code flow can continue.
138              
139             For an instruction that does not branch, this is the same as C.
140              
141             For a decision-branch instruction, these are the C and the C.
142              
143             For an instruction that breaks the flow (e.g. C), this is an empty list.
144              
145             A C or C instruction is considered as breaking the flow, because
146             the called routine might manipulate the return pointer in the stack, and the
147             bytes after the C or C instruction can be data bytes.
148              
149             =head2 bytes
150              
151             Reference to a list of the instruction bytes. The bytes are retrieved
152             from the L
153             object.
154              
155             =head2 opcode
156              
157             Canonical assembly instruction, e.g. 'ld a,(NN)'.
158             The possible argument types are N, NN, DIS and STR.
159             There is one method to get/set each of the argument types.
160              
161             =head2 N
162              
163             8-bit data used by the instruction.
164              
165             =head2 N2
166              
167             8-bit data used by the instruction, to be shown in base 2.
168              
169             =head2 NN
170              
171             16-bit data used by the instruction.
172              
173             =head2 DIS
174              
175             Offset for index register.
176              
177             =head2 STR
178              
179             String for defm* instructions.
180              
181             =head2 comment
182              
183             Comment to be written after a '; ' at the end of the line.
184              
185             =head2 format
186              
187             Returs the hash of special formating functions for each type of argument. These
188             functions, if defined, are called instead of the ones in the
189             L module to format
190             each type of argument.
191              
192             For example, to format the 8-bit argument of an instruction as decimal:
193              
194             $instr->format->{N} = sub { my $v = shift; return "$v" };
195              
196             =cut
197              
198             #------------------------------------------------------------------------------
199              
200             =head1 PREDICATES
201              
202             =head2 is_code
203              
204             Return TRUE if the instruction is a Z80 assembly opcode, FALSE if it is one
205             of the data definition or org instructions.
206              
207             =head2 is_call
208              
209             Return TRUE if the instruction is a call instruction, i.e. C or C.
210              
211             =head2 is_branch
212              
213             Return TRUE if the instruction may branch to another address, the address is
214             stored in the C attribute. This is either a jump or a call instruction.
215              
216             =head2 is_break_flow
217              
218             Return TRUE if the instruction breaks the flow at this point and jumps to some
219             other part of the code. A call instruction is considered as breaking the flow,
220             see C above.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225 7     7   81 use base 'Class::Accessor';
  7         14  
  7         9577  
226             __PACKAGE__->mk_accessors(
227             'memory', # point to whole memory
228             'addr', # start address
229             'size', # number of bytes of instruction
230             'opcode', # canonical opcode, e.g. 'ld a,(NN)'
231             'N', # 8-bit data
232             'N2', # 8-bit data in binary
233             'NN', # 16-bit data
234             'DIS', # offset for index
235             'STR', # unquoted string for defm*
236             'comment', # comment after instruction
237             '_format', # hash of (N, NN, DIS, STR) => custom function to
238             # format each type of argument
239             'is_code', # true for a Z80 assembly instruction,
240             # false for def*, org, ...
241             );
242              
243             #------------------------------------------------------------------------------
244             sub format {
245 19952     19952 1 163606 my($self) = @_;
246 19952 100       40511 $self->_format({}) unless $self->_format;
247 19952         245607 $self->_format;
248             }
249              
250             #------------------------------------------------------------------------------
251             my %default_format = (
252             N => \&format_hex2,
253             N2 => \&format_bin8,
254             NN => \&format_hex4,
255             DIS => \&format_dis,
256             STR => \&format_str,
257             );
258              
259             #------------------------------------------------------------------------------
260             sub next_addr {
261 243403     243403 1 2723347 my($self) = @_;
262 243403         423078 $self->addr + $self->size;
263             }
264              
265             #------------------------------------------------------------------------------
266             sub next_code {
267 12238     12238 1 29209 my($self) = @_;
268 12238         16645 my @ret;
269 12238 100       20323 push @ret, $self->NN if $self->is_branch;
270 12238 100       159166 push @ret, $self->next_addr unless $self->is_break_flow;
271 12238         194492 @ret;
272             }
273              
274             #------------------------------------------------------------------------------
275             sub bytes {
276 45831     45831 1 66953 my($self) = @_;
277 45831         54436 my @bytes;
278 45831         68414 for my $addr ($self->addr .. $self->next_addr - 1) {
279 72282         905875 push @bytes, $self->memory->peek($addr);
280             }
281 45831         464464 \@bytes;
282             }
283              
284             #------------------------------------------------------------------------------
285             # predicates
286 132492     132492 1 256045 sub is_call { shift->opcode =~ /call|rst/ }
287 127216     127216 1 1214838 sub is_branch { shift->opcode =~ /jp .*NN|jr|djnz|call|rst/ }
288 148003     148003 1 288906 sub is_break_flow { shift->opcode =~ /ret$|reti|retn|call NN|rst|jr NN|jp NN|jp \(\w+\)|org/ }
289              
290             #------------------------------------------------------------------------------
291             sub disassemble {
292 64250     64250 1 877194 my($class, $memory, $addr, $limit_addr) = @_;
293              
294 64250         176594 my $self = bless { memory => $memory,
295             addr => $addr,
296             is_code => 1,
297             }, $class;
298              
299             # save bytes of all decoded instructions
300 64250         89760 my @found; # other instructions found
301            
302 64250         137813 my $table = Asm::Z80::Table->disasm_table;
303 64250   100     229709 for ( ;
304             # exit if second instruction goes over limit, e.g. label
305             ! (defined($limit_addr) && @found && $addr == $limit_addr) ;
306             $addr++
307             ) {
308             # fetch
309 100040         206855 my $byte = $memory->peek($addr);
310 100040 100       977066 last unless defined $byte; # unloaded memory
311            
312             # lookup in table
313 100038 100       337304 if (exists $table->{N}) {
    100          
    100          
    100          
    100          
    100          
314 5871 50       12475 die if defined $self->N;
315 5871         51997 $self->N( $byte );
316 5871         52773 $table = $table->{N};
317             }
318             elsif (exists $table->{NNl}) {
319 10386 50       20435 die if defined $self->NN;
320 10386         96993 $self->NN( $memory->peek16u($addr++) );
321 10386         97215 $table = $table->{NNl}{NNh};
322             }
323             elsif (exists $table->{NNo}) {
324 6622 50       13374 die if defined $self->NN;
325 6622         60125 $self->NN( $addr + 1 + $memory->peek8s($addr) );
326 6622         58600 $table = $table->{NNo};
327             }
328             elsif (exists $table->{DIS}) {
329 1709 50       4012 die if defined $self->DIS;
330 1709         15698 $self->DIS( $memory->peek8s($addr) );
331 1709         15925 $table = $table->{DIS};
332             }
333             elsif (exists $table->{'DIS+1'}) {
334 21 50       50 die unless defined $self->DIS;
335 21 50       190 if ( $self->DIS + 1 != $memory->peek8s($addr) ) {
336 0         0 last; # abort search
337             }
338 21         64 $table = $table->{'DIS+1'};
339             }
340             elsif (! exists $table->{$byte}) {
341 5635         7533 last; # abort search
342             }
343             else {
344 69794         105230 $table = $table->{$byte};
345             }
346            
347             # check for end
348 94403 100       204059 if (exists $table->{''}) { # possible finish
349 64444         82716 push @found, [ [@{$table->{''}}], $addr + 1 ];
  64444         191198  
350             # save this instance, copy
351 64444 100       153622 last unless defined $limit_addr; # no limit -> shortest instr
352            
353             # continue for composite instruction
354             }
355             }
356            
357             # return undef if no instrution found
358 64250 100       114963 return undef unless @found;
359            
360             # collect last complete instruction found
361 64236         80913 my($opcode, @args) = @{$found[-1][0]};
  64236         144768  
362 64236 100       168818 $opcode .= ' '.join('', @args) if @args;
363 64236         193834 $opcode =~ s/,\s*/, /g;
364            
365 64236         167847 $self->opcode($opcode);
366 64236         619430 $self->size($found[-1][1] - $self->addr);
367            
368             # special case: rst -> show address in hex
369 64236 100       981724 if ($opcode =~ /rst (\d+)/) {
370 5590         14263 $self->N($1); # set N for display
371 5590         53442 $self->NN($1); # set NN for analysis
372 5590         46406 $self->opcode('rst N');
373             }
374            
375 64236         274598 $self;
376             }
377              
378             #------------------------------------------------------------------------------
379             sub _def_value {
380 7596     7596   15413 my($class, $peek, $size, $def, $N,
381             $memory, $addr, $count) = @_;
382            
383 7596   100     17389 $count ||= 1;
384 7596         12675 my $values = [];
385 7596         15776 for my $i (0 .. $count - 1) {
386 45928         95817 my $value = $memory->$peek($addr + $size * $i); # read values
387 45928 100       404293 return undef unless defined $value; # unloaded memory
388            
389 45923         76361 $values->[$i] = $value;
390             }
391            
392 7591         39470 return bless { memory => $memory,
393             addr => $addr,
394             size => $size * $count,
395             opcode => "$def $N",
396             $N => $values,
397             }, $class;
398             }
399              
400             #------------------------------------------------------------------------------
401 7274     7274 1 73674 sub defb { shift->_def_value('peek8u', 1, 'defb', 'N', @_) }
402 0     0 1 0 sub defb2 { shift->_def_value('peek8u', 1, 'defb', 'N2', @_) }
403 322     322 1 3545 sub defw { shift->_def_value('peek16u', 2, 'defw', 'NN', @_) }
404              
405             #------------------------------------------------------------------------------
406             sub _def_str {
407 216     216   475 my($class, $peek, $eos_length, $def,
408             $memory, $addr, $length) = @_;
409            
410 216         608 my $str = $memory->$peek($addr, $length);
411 216 100       541 return undef unless defined $str; # unloaded memory
412            
413 210         1395 return $class->new({memory => $memory,
414             addr => $addr,
415             size => length($str) + $eos_length,
416             opcode => "$def STR",
417             STR => $str});
418             }
419              
420             #------------------------------------------------------------------------------
421 82     82 1 914 sub defm { shift->_def_str('peek_str', 0, 'defm', @_) }
422 3     3 1 15 sub defmz { shift->_def_str('peek_strz', 1, 'defmz', @_) }
423 131     131 1 1331 sub defm7 { shift->_def_str('peek_str7', 0, 'defm7', @_) }
424              
425             #------------------------------------------------------------------------------
426             sub org {
427 8     8 1 138 my($class, $memory, $addr) = @_;
428              
429 8         74 return bless { memory => $memory,
430             addr => $addr,
431             size => 0,
432             opcode => "org NN",
433             NN => $addr,
434             }, $class;
435             }
436              
437             #------------------------------------------------------------------------------
438              
439             =head1 FUNCTIONS
440              
441             =head2 as_string
442              
443             Returns the disassembled instruction opcode and arguments.
444              
445             =cut
446              
447             #------------------------------------------------------------------------------
448             # Format of the disassembled output
449             # 1 2 3 4 5 6 7
450             # 0123456789012345678901234567890123456789012345678901234567890123456789012
451             # # # # # # # # # # #
452             # AAAA H1H2H3H4H5 INSTR ; COMMENT
453             #
454             sub as_string {
455 63426     63426 1 123151 my($self) = @_;
456              
457             # decode opcode
458 63426         118063 my $opcode = $self->opcode;
459 63426         673954 $opcode =~ s{\b ( N | N2 | NN | \+(DIS) | STR ) \b
460             }{
461 33014   66     130597 $self->_format_arg($2 || $1)
462             }gex;
463            
464 63426         173900 my $comment = $self->comment;
465            
466 63426 100       524575 if (defined $comment) {
467 12130         24069 $comment =~ s/\n/ "\n" . " " x 32 . "; " /ge; # multi-line comment
  3251         8449  
468             }
469            
470 63426 100       187732 return !defined($comment) ?
    100          
471             $opcode :
472             length($opcode) >= 24 ?
473             $opcode . "\n" . " " x 32 . "; " . $comment :
474             sprintf("%-24s; %s", $opcode, $comment);
475             }
476              
477             sub _format_arg {
478 33014     33014   75646 my($self, $arg) = @_;
479              
480             my $ffunc = ( $self->_format && $self->format->{$arg} ?
481             $self->format->{$arg} :
482 33014 100 100     69189 $default_format{$arg}
483             );
484 33014         321131 my $value = $self->$arg;
485 33014 100       283801 $value = [$value] unless ref($value);
486            
487 33014         58955 return join(", ", map {$ffunc->($_)} @$value)
  71366         123799  
488             }
489              
490             #------------------------------------------------------------------------------
491              
492             =head2 dump
493              
494             Returns the disassembly dump ready to print, containing address, bytes and
495             instruction, followed by newline.
496              
497             =cut
498              
499             #------------------------------------------------------------------------------
500 7     7   62 use constant BPL => 5;
  7         20  
  7         2003  
501              
502             sub dump {
503 45792     45792 1 167724 my($self) = @_;
504              
505             # address
506 45792         74400 my $ret = sprintf("%04X ", $self->addr);
507            
508             # bytes
509 45792         412021 my $bytes = '';
510 45792         54818 for (@{$self->bytes}) {
  45792         77491  
511 72104         142322 $bytes .= sprintf("%02X", $_);
512             }
513            
514             # first line of bytes
515 45792         156096 $ret .= sprintf("%-*s ", BPL*2, substr($bytes, 0, BPL*2));
516 45792 100       103429 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
517            
518             # opcode
519 45792         83767 $ret .= $self->as_string . "\n";
520            
521             # next lines of bytes
522 45792         103013 while ($bytes ne '') {
523 20         87 $ret .= " " x 5 . sprintf("%-*s \n", BPL*2, substr($bytes, 0, BPL*2));
524 20 100       78 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
525             }
526            
527 45792         253670 $ret;
528             }
529              
530             #------------------------------------------------------------------------------
531              
532             =head2 asm
533              
534             Returns the disassembly asm line ready to print, containing
535             instruction and comments, followed by newline.
536              
537             =cut
538              
539             #------------------------------------------------------------------------------
540             sub asm {
541 17560     17560 1 28445 my($self) = @_;
542            
543 17560 100 100     31753 sprintf("%-7s %s\n%s", '',
544             $self->as_string,
545             ($self->is_break_flow && ! $self->is_call) ? "\n" : "");
546             }
547              
548             #------------------------------------------------------------------------------
549              
550             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE AND COPYRIGHT
551              
552             See L.
553              
554             =cut
555              
556             #------------------------------------------------------------------------------
557              
558             1;