File Coverage

blib/lib/CPU/Z80/Disassembler/Instruction.pm
Criterion Covered Total %
statement 129 131 98.4
branch 56 62 90.3
condition 13 14 92.8
subroutine 27 28 96.4
pod 18 18 100.0
total 243 253 96.0


line stmt bran cond sub pod time code
1             package CPU::Z80::Disassembler::Instruction;
2              
3             #------------------------------------------------------------------------------
4              
5             =head1 NAME
6              
7             CPU::Z80::Disassembler::Instruction - One Z80 disassembled instruction
8              
9             =cut
10              
11             #------------------------------------------------------------------------------
12              
13 7     7   1034 use strict;
  7         16  
  7         209  
14 7     7   38 use warnings;
  7         11  
  7         205  
15              
16 7     7   24642 use Asm::Z80::Table;
  7         361722  
  7         421  
17 7     7   889 use CPU::Z80::Disassembler::Memory;
  7         16  
  7         100  
18 7     7   254 use CPU::Z80::Disassembler::Format;
  7         17  
  7         980  
19              
20             our $VERSION = '1.01';
21              
22             #------------------------------------------------------------------------------
23              
24             =head1 SYNOPSIS
25              
26             use CPU::Z80::Disassembler::Instruction;
27             $instr = CPU::Z80::Disassembler::Instruction->disassemble(
28             $memory, $addr, $limit_addr);
29             $instr = CPU::Z80::Disassembler::Instruction->defb($memory, $addr, $count);
30             $instr = CPU::Z80::Disassembler::Instruction->defb2($memory, $addr, $count);
31             $instr = CPU::Z80::Disassembler::Instruction->defw($memory, $addr, $count);
32             $instr = CPU::Z80::Disassembler::Instruction->defm($memory, $addr, $length);
33             $instr = CPU::Z80::Disassembler::Instruction->defmz($memory, $addr);
34             $instr = CPU::Z80::Disassembler::Instruction->defm7($memory, $addr);
35             $instr = CPU::Z80::Disassembler::Instruction->org($memory, $addr);
36            
37             $instr->addr; $instr->next_addr;
38             $instr->bytes; $instr->opcode; $instr->N; $instr->NN; $instr->DIS; $instr->STR;
39             $instr->comment;
40             print $instr->dump;
41             print $instr->asm;
42             print $instr->as_string, "\n";
43              
44             =head1 DESCRIPTION
45              
46             This module represents one disassembled instruction. The object is
47             constructed by one of the factory methods, and has attributes to ease the
48             interpretation of the instruction.
49              
50             =head1 CONSTRUCTORS
51              
52             =head2 disassemble
53              
54             Factory method to create a new object by disassembling the given
55             L object
56             at the given address.
57              
58             The C<$limit_addr> argument, if defined, tells the disassembler to select
59             the longest possible instruction, that does not use the byte at C<$limit_add>.
60             The default is to select the shortest possible instruction.
61              
62             For example, the sequence of bytes C<62 6B> is decoded as C if
63             C<$limit_addr> is undef.
64              
65             If C<$limit_addr> is defined with any value different from C<$addr + 1>, where
66             the second byte is stored, then the same sequence of bytes is decoded as
67             C.
68              
69             To decode standard Z80 instructions, do not pass the C<$limit_addr> argument.
70              
71             To decode extended Z80 instructions, pass the address of the next label after
72             C<$addr>, or 0x10000 to get always the longest instruction.
73              
74             If the instruction at the given address is an invalid opcode, or if there
75             are no loaded bytes at the given address, the instrution object is not
76             constructed and the factory returns C.
77              
78             =head2 defb
79              
80             Factory method to create a new object by disassembling a C instruction
81             at the given address, reading one or C<$count> byte(s) from memory.
82              
83             =head2 defb2
84              
85             Same as defb but shows binary data.
86              
87             =head2 defw
88              
89             Factory method to create a new object by disassembling a C instruction
90             at the given address, reading one or C<$count> word(s) from memory.
91              
92             =head2 defm
93              
94             Factory method to create a new object by disassembling a C instruction
95             at the given address, reading C<$length> character(s) from memory.
96              
97             =head2 defmz
98              
99             Factory method to create a new object by disassembling a C instruction
100             at the given address, reading character(s) from memory until a zero terminator
101             is found.
102              
103             =head2 defm7
104              
105             Factory method to create a new object by disassembling a C instruction
106             at the given address, reading character(s) from memory until a character
107             with bit 7 set is found.
108              
109             =head2 org
110              
111             Factory method to create a new ORG instruction.
112              
113             =cut
114              
115             #------------------------------------------------------------------------------
116              
117             =head1 ATTRIBUTES
118              
119             =head2 memory
120              
121             Point to the memory object from where this instruction was disassembled.
122              
123             =head2 addr
124              
125             Address of the instruction.
126              
127             =head2 size
128              
129             Size of the instruction, in bytes.
130              
131             =head2 next_addr
132              
133             Returns the address that follows this instruction.
134              
135             =head2 next_code
136              
137             Returns the list with the next possible addresses where the code flow can continue.
138              
139             For an instruction that does not branch, this is the same as C.
140              
141             For a decision-branch instruction, these are the C and the C.
142              
143             For an instruction that breaks the flow (e.g. C), this is an empty list.
144              
145             A C or C instruction is considered as breaking the flow, because
146             the called routine might manipulate the return pointer in the stack, and the
147             bytes after the C or C instruction can be data bytes.
148              
149             =head2 bytes
150              
151             Reference to a list of the instruction bytes. The bytes are retrieved
152             from the L
153             object.
154              
155             =head2 opcode
156              
157             Canonical assembly instruction, e.g. 'ld a,(NN)'.
158             The possible argument types are N, NN, DIS and STR.
159             There is one method to get/set each of the argument types.
160              
161             =head2 N
162              
163             8-bit data used by the instruction.
164              
165             =head2 N2
166              
167             8-bit data used by the instruction, to be shown in base 2.
168              
169             =head2 NN
170              
171             16-bit data used by the instruction.
172              
173             =head2 DIS
174              
175             Offset for index register.
176              
177             =head2 STR
178              
179             String for defm* instructions.
180              
181             =head2 comment
182              
183             Comment to be written after a '; ' at the end of the line.
184              
185             =head2 format
186              
187             Returs the hash of special formating functions for each type of argument. These
188             functions, if defined, are called instead of the ones in the
189             L module to format
190             each type of argument.
191              
192             For example, to format the 8-bit argument of an instruction as decimal:
193              
194             $instr->format->{N} = sub { my $v = shift; return "$v" };
195              
196             =cut
197              
198             #------------------------------------------------------------------------------
199              
200             =head1 PREDICATES
201              
202             =head2 is_code
203              
204             Return TRUE if the instruction is a Z80 assembly opcode, FALSE if it is one
205             of the data definition or org instructions.
206              
207             =head2 is_call
208              
209             Return TRUE if the instruction is a call instruction, i.e. C or C.
210              
211             =head2 is_branch
212              
213             Return TRUE if the instruction may branch to another address, the address is
214             stored in the C attribute. This is either a jump or a call instruction.
215              
216             =head2 is_break_flow
217              
218             Return TRUE if the instruction breaks the flow at this point and jumps to some
219             other part of the code. A call instruction is considered as breaking the flow,
220             see C above.
221              
222             =cut
223              
224             #------------------------------------------------------------------------------
225 7     7   140 use base 'Class::Accessor';
  7         19  
  7         12151  
226             __PACKAGE__->mk_accessors(
227             'memory', # point to whole memory
228             'addr', # start address
229             'size', # number of bytes of instruction
230             'opcode', # canonical opcode, e.g. 'ld a,(NN)'
231             'N', # 8-bit data
232             'N2', # 8-bit data in binary
233             'NN', # 16-bit data
234             'DIS', # offset for index
235             'STR', # unquoted string for defm*
236             'comment', # comment after instruction
237             '_format', # hash of (N, NN, DIS, STR) => custom function to
238             # format each type of argument
239             'is_code', # true for a Z80 assembly instruction,
240             # false for def*, org, ...
241             );
242              
243             #------------------------------------------------------------------------------
244             sub format {
245 19951     19951 1 177838 my($self) = @_;
246 19951 100       41280 $self->_format({}) unless $self->_format;
247 19951         263114 $self->_format;
248             }
249              
250             #------------------------------------------------------------------------------
251             my %default_format = (
252             N => \&format_hex2,
253             N2 => \&format_bin8,
254             NN => \&format_hex4,
255             DIS => \&format_dis,
256             STR => \&format_str,
257             );
258              
259             #------------------------------------------------------------------------------
260             sub next_addr {
261 243324     243324 1 2981636 my($self) = @_;
262 243324         510704 $self->addr + $self->size;
263             }
264              
265             #------------------------------------------------------------------------------
266             sub next_code {
267 12237     12237 1 33100 my($self) = @_;
268 12237         18863 my @ret;
269 12237 100       23841 push @ret, $self->NN if $self->is_branch;
270 12237 100       174661 push @ret, $self->next_addr unless $self->is_break_flow;
271 12237         213865 @ret;
272             }
273              
274             #------------------------------------------------------------------------------
275             sub bytes {
276 45831     45831 1 81946 my($self) = @_;
277 45831         69560 my @bytes;
278 45831         93889 for my $addr ($self->addr .. $self->next_addr - 1) {
279 72282         1113423 push @bytes, $self->memory->peek($addr);
280             }
281 45831         570875 \@bytes;
282             }
283              
284             #------------------------------------------------------------------------------
285             # predicates
286 132403     132403 1 298949 sub is_call { shift->opcode =~ /call|rst/ }
287 127161     127161 1 1319444 sub is_branch { shift->opcode =~ /jp .*NN|jr|djnz|call|rst/ }
288 147914     147914 1 333041 sub is_break_flow { shift->opcode =~ /ret$|reti|retn|call NN|rst|jr NN|jp NN|jp \(\w+\)|org/ }
289              
290             #------------------------------------------------------------------------------
291             sub disassemble {
292 64250     64250 1 1042942 my($class, $memory, $addr, $limit_addr) = @_;
293              
294 64250         217311 my $self = bless { memory => $memory,
295             addr => $addr,
296             is_code => 1,
297             }, $class;
298              
299             # save bytes of all decoded instructions
300 64250         108844 my @found; # other instructions found
301            
302 64250         157633 my $table = Asm::Z80::Table->disasm_table;
303 64250   100     268479 for ( ;
304             # exit if second instruction goes over limit, e.g. label
305             ! (defined($limit_addr) && @found && $addr == $limit_addr) ;
306             $addr++
307             ) {
308             # fetch
309 100040         240601 my $byte = $memory->peek($addr);
310 100040 100       1155042 last unless defined $byte; # unloaded memory
311            
312             # lookup in table
313 100038 100       405846 if (exists $table->{N}) {
    100          
    100          
    100          
    100          
    100          
314 5871 50       13734 die if defined $self->N;
315 5871         63183 $self->N( $byte );
316 5871         62322 $table = $table->{N};
317             }
318             elsif (exists $table->{NNl}) {
319 10386 50       25297 die if defined $self->NN;
320 10386         113655 $self->NN( $memory->peek16u($addr++) );
321 10386         116266 $table = $table->{NNl}{NNh};
322             }
323             elsif (exists $table->{NNo}) {
324 6622 50       16924 die if defined $self->NN;
325 6622         71832 $self->NN( $addr + 1 + $memory->peek8s($addr) );
326 6622         71416 $table = $table->{NNo};
327             }
328             elsif (exists $table->{DIS}) {
329 1709 50       4450 die if defined $self->DIS;
330 1709         18939 $self->DIS( $memory->peek8s($addr) );
331 1709         18844 $table = $table->{DIS};
332             }
333             elsif (exists $table->{'DIS+1'}) {
334 21 50       58 die unless defined $self->DIS;
335 21 50       292 if ( $self->DIS + 1 != $memory->peek8s($addr) ) {
336 0         0 last; # abort search
337             }
338 21         51 $table = $table->{'DIS+1'};
339             }
340             elsif (! exists $table->{$byte}) {
341 5635         9544 last; # abort search
342             }
343             else {
344 69794         136082 $table = $table->{$byte};
345             }
346            
347             # check for end
348 94403 100       249696 if (exists $table->{''}) { # possible finish
349 64444         105617 push @found, [ [@{$table->{''}}], $addr + 1 ];
  64444         250308  
350             # save this instance, copy
351 64444 100       181013 last unless defined $limit_addr; # no limit -> shortest instr
352            
353             # continue for composite instruction
354             }
355             }
356            
357             # return undef if no instrution found
358 64250 100       137856 return undef unless @found;
359            
360             # collect last complete instruction found
361 64236         99855 my($opcode, @args) = @{$found[-1][0]};
  64236         174309  
362 64236 100       199370 $opcode .= ' '.join('', @args) if @args;
363 64236         228642 $opcode =~ s/,\s*/, /g;
364            
365 64236         199651 $self->opcode($opcode);
366 64236         746539 $self->size($found[-1][1] - $self->addr);
367            
368             # special case: rst -> show address in hex
369 64236 100       1184570 if ($opcode =~ /rst (\d+)/) {
370 5590         17155 $self->N($1); # set N for display
371 5590         66180 $self->NN($1); # set NN for analysis
372 5590         56500 $self->opcode('rst N');
373             }
374            
375 64236         319335 $self;
376             }
377              
378             #------------------------------------------------------------------------------
379             sub _def_value {
380 7596     7596   18488 my($class, $peek, $size, $def, $N,
381             $memory, $addr, $count) = @_;
382            
383 7596   100     20757 $count ||= 1;
384 7596         14980 my $values = [];
385 7596         18948 for my $i (0 .. $count - 1) {
386 45928         117833 my $value = $memory->$peek($addr + $size * $i); # read values
387 45928 100       496110 return undef unless defined $value; # unloaded memory
388            
389 45923         94188 $values->[$i] = $value;
390             }
391            
392 7591         47765 return bless { memory => $memory,
393             addr => $addr,
394             size => $size * $count,
395             opcode => "$def $N",
396             $N => $values,
397             }, $class;
398             }
399              
400             #------------------------------------------------------------------------------
401 7274     7274 1 85244 sub defb { shift->_def_value('peek8u', 1, 'defb', 'N', @_) }
402 0     0 1 0 sub defb2 { shift->_def_value('peek8u', 1, 'defb', 'N2', @_) }
403 322     322 1 3993 sub defw { shift->_def_value('peek16u', 2, 'defw', 'NN', @_) }
404              
405             #------------------------------------------------------------------------------
406             sub _def_str {
407 216     216   494 my($class, $peek, $eos_length, $def,
408             $memory, $addr, $length) = @_;
409            
410 216         629 my $str = $memory->$peek($addr, $length);
411 216 100       555 return undef unless defined $str; # unloaded memory
412            
413 210         1612 return $class->new({memory => $memory,
414             addr => $addr,
415             size => length($str) + $eos_length,
416             opcode => "$def STR",
417             STR => $str});
418             }
419              
420             #------------------------------------------------------------------------------
421 82     82 1 905 sub defm { shift->_def_str('peek_str', 0, 'defm', @_) }
422 3     3 1 13 sub defmz { shift->_def_str('peek_strz', 1, 'defmz', @_) }
423 131     131 1 1398 sub defm7 { shift->_def_str('peek_str7', 0, 'defm7', @_) }
424              
425             #------------------------------------------------------------------------------
426             sub org {
427 8     8 1 194 my($class, $memory, $addr) = @_;
428              
429 8         100 return bless { memory => $memory,
430             addr => $addr,
431             size => 0,
432             opcode => "org NN",
433             NN => $addr,
434             }, $class;
435             }
436              
437             #------------------------------------------------------------------------------
438              
439             =head1 FUNCTIONS
440              
441             =head2 as_string
442              
443             Returns the disassembled instruction opcode and arguments.
444              
445             =cut
446              
447             #------------------------------------------------------------------------------
448             # Format of the disassembled output
449             # 1 2 3 4 5 6 7
450             # 0123456789012345678901234567890123456789012345678901234567890123456789012
451             # # # # # # # # # # #
452             # AAAA H1H2H3H4H5 INSTR ; COMMENT
453             #
454             sub as_string {
455 63426     63426 1 145891 my($self) = @_;
456              
457             # decode opcode
458 63426         138726 my $opcode = $self->opcode;
459 63426         812084 $opcode =~ s{\b ( N | N2 | NN | \+(DIS) | STR ) \b
460             }{
461 33014   66     152743 $self->_format_arg($2 || $1)
462             }gex;
463            
464 63426         197348 my $comment = $self->comment;
465            
466 63426 100       628681 if (defined $comment) {
467 12130         26098 $comment =~ s/\n/ "\n" . " " x 32 . "; " /ge; # multi-line comment
  3251         8731  
468             }
469            
470 63426 100       222602 return !defined($comment) ?
    100          
471             $opcode :
472             length($opcode) >= 24 ?
473             $opcode . "\n" . " " x 32 . "; " . $comment :
474             sprintf("%-24s; %s", $opcode, $comment);
475             }
476              
477             sub _format_arg {
478 33014     33014   91480 my($self, $arg) = @_;
479              
480             my $ffunc = ( $self->_format && $self->format->{$arg} ?
481             $self->format->{$arg} :
482 33014 100 100     79840 $default_format{$arg}
483             );
484 33014         378442 my $value = $self->$arg;
485 33014 100       339333 $value = [$value] unless ref($value);
486            
487 33014         70931 return join(", ", map {$ffunc->($_)} @$value)
  71366         155067  
488             }
489              
490             #------------------------------------------------------------------------------
491              
492             =head2 dump
493              
494             Returns the disassembly dump ready to print, containing address, bytes and
495             instruction, followed by newline.
496              
497             =cut
498              
499             #------------------------------------------------------------------------------
500 7     7   65 use constant BPL => 5;
  7         17  
  7         2555  
501              
502             sub dump {
503 45792     45792 1 208569 my($self) = @_;
504              
505             # address
506 45792         91296 my $ret = sprintf("%04X ", $self->addr);
507            
508             # bytes
509 45792         505553 my $bytes = '';
510 45792         66233 for (@{$self->bytes}) {
  45792         98702  
511 72104         181919 $bytes .= sprintf("%02X", $_);
512             }
513            
514             # first line of bytes
515 45792         187802 $ret .= sprintf("%-*s ", BPL*2, substr($bytes, 0, BPL*2));
516 45792 100       123219 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
517            
518             # opcode
519 45792         103122 $ret .= $self->as_string . "\n";
520            
521             # next lines of bytes
522 45792         119507 while ($bytes ne '') {
523 20         112 $ret .= " " x 5 . sprintf("%-*s \n", BPL*2, substr($bytes, 0, BPL*2));
524 20 100       76 $bytes = length($bytes) < BPL*2 ? '' : substr($bytes, BPL*2);
525             }
526            
527 45792         305904 $ret;
528             }
529              
530             #------------------------------------------------------------------------------
531              
532             =head2 asm
533              
534             Returns the disassembly asm line ready to print, containing
535             instruction and comments, followed by newline.
536              
537             =cut
538              
539             #------------------------------------------------------------------------------
540             sub asm {
541 17560     17560 1 32348 my($self) = @_;
542            
543 17560 100 100     36308 sprintf("%-7s %s\n%s", '',
544             $self->as_string,
545             ($self->is_break_flow && ! $self->is_call) ? "\n" : "");
546             }
547              
548             #------------------------------------------------------------------------------
549              
550             =head1 AUTHOR, BUGS, FEEDBACK, LICENSE AND COPYRIGHT
551              
552             See L.
553              
554             =cut
555              
556             #------------------------------------------------------------------------------
557              
558             1;