File Coverage

blib/lib/CPU/Z80/Assembler.pm
Criterion Covered Total %
statement 62 62 100.0
branch 6 8 75.0
condition n/a
subroutine 17 17 100.0
pod 4 4 100.0
total 89 91 97.8


line stmt bran cond sub pod time code
1             # $Id$
2              
3             package CPU::Z80::Assembler;
4              
5             #------------------------------------------------------------------------------
6              
7             =head1 NAME
8              
9             CPU::Z80::Assembler - a Z80 assembler
10              
11             =cut
12              
13             #------------------------------------------------------------------------------
14              
15 31     31   131937 use strict;
  31         124  
  31         928  
16 31     31   336 use warnings;
  31         68  
  31         865  
17              
18 31     31   14621 use Asm::Preproc;
  31         327861  
  31         178  
19 31     31   16450 use Asm::Preproc::Lexer;
  31         392885  
  31         195  
20 31     31   16561 use CPU::Z80::Assembler::Program;
  31         123  
  31         1282  
21 31     31   14673 use CPU::Z80::Assembler::List;
  31         102  
  31         931  
22              
23 31     31   220 use Text::Tabs; # imports expand(), unexpand()
  31         69  
  31         2901  
24 31     31   14005 use Regexp::Trie;
  31         15931  
  31         1110  
25              
26 31     31   242 use vars qw(@EXPORT $verbose);
  31         70  
  31         2257  
27              
28             our $VERSION = '2.24';
29             our $verbose;
30             our $fill_byte = 0xFF;
31              
32 31     31   217 use base qw(Exporter);
  31         69  
  31         55810  
33              
34             @EXPORT = qw(z80asm z80asm_file z80preprocessor z80lexer);
35              
36             #------------------------------------------------------------------------------
37              
38             =head1 SYNOPSIS
39              
40             use CPU::Z80::Assembler;
41              
42             $CPU::Z80::Assembler::verbose = 1;
43             $CPU::Z80::Assembler::fill_byte = 0xFF;
44             $binary = z80asm(q{
45             ORG 0x1000
46             LD A, 1
47             ...
48             });
49             $binary = z80asm_file($asm_file);
50             $binary = z80asm(@asm_lines);
51             $binary = z80asm('#include ');
52             open($fh, $file); $binary = z80asm(sub {<$fh>});
53              
54             $lines = z80preprocessor(@asm_lines); $line = $lines->next;
55             $tokens = z80lexer(@asm_lines); $token = $tokens->next;
56              
57             =head1 DESCRIPTION
58              
59             This module provides functions to assemble a set of Z80 assembly instructions
60             given as a list or as an iterator, or a Z80 assembly source file.
61              
62             =head1 EXPORTS
63              
64             All functions are exported by default.
65              
66             =head1 FUNCTIONS
67              
68             =head2 z80asm
69              
70             This function takes as parameter a list of either text lines to parse,
71             or iterators that return text lines to parse.
72              
73             The list is passed to C, that in turn calls C to
74             handle file includes, and then splits the input into tokens.
75              
76             The stream of tokens is passed on to L that parses the
77             input and generates the object image in L.
78             Assembly macro expansion is handled at this stage by L.
79              
80             The assembly program is composed by a list of L, each
81             representing one named section of code. Each segment is composed by a list of L, each representing one assembly instruction.
82              
83             The output object code is returned as a string.
84              
85             If the $CPU::Z80::Assembler::verbose variable is set, an output listing is generated
86             by L on standard output.
87              
88             Assembly is done in five steps:
89              
90             =over 4
91              
92             =item 1
93              
94             input is preprocessed, scanned and split into tokens
95              
96             =item 2
97              
98             tokens are parsed and converted to lists of opcodes
99              
100             =item 3
101              
102             addresses for each opcode are allocated
103              
104             =item 4
105              
106             relative jumps are checked for out-of-range jumps and replaced by absolute
107             jumps if needed
108              
109             =item 5
110              
111             object code is generated for each opcode, computing all expressions used; the expressions are
112             represented by L.
113              
114             =back
115              
116             =cut
117              
118             #------------------------------------------------------------------------------
119             sub z80asm {
120 2738     2738 1 5607690 my(@input) = @_;
121 2738 50       8326 my $list_output = ($CPU::Z80::Assembler::verbose) ?
122             CPU::Z80::Assembler::List->new(
123             input => \@input,
124             output => \*STDOUT) :
125             undef;
126 2738         10537 my $program = CPU::Z80::Assembler::Program->new();
127 2738         8750 my $token_stream = z80lexer(@input);
128 2738         9171 $program->parse($token_stream);
129 2725         8221 my $bytes = $program->bytes($list_output);
130 2724 50       6119 $list_output->flush() if $list_output;
131 2724         51142 return $bytes;
132             }
133             #------------------------------------------------------------------------------
134              
135             =head2 z80asm_file
136              
137             This function takes as argument a Z80 assembly source file name and returns
138             the binary object code string.
139              
140             =cut
141              
142             #------------------------------------------------------------------------------
143             sub z80asm_file {
144 1     1 1 60 my($file) = @_;
145 1         9 return z80asm("#include <$file>");
146             }
147             #------------------------------------------------------------------------------
148              
149             =head2 z80preprocessor
150              
151             This function takes as parameter a list of either text lines to parse,
152             or iterators that return text lines to parse.
153              
154             The list is passed to the L that takes care
155             of file includes and handles the %line and #line lines generated by external
156             preprocessors like cpp or nasm.
157              
158             The result is a
159             L
160             of L objects that contain each of
161             the input lines of the input.
162              
163             =cut
164              
165             #------------------------------------------------------------------------------
166              
167             sub z80preprocessor {
168 4587     4587 1 17813 my(@input) = @_;
169 4587         14445 my $pp = Asm::Preproc->new;
170 4587         52974 $pp->include_list(@input);
171            
172             # create a new stream to handle "INCLUDE" statement
173             return Iterator::Simple::Lookahead->new(
174             sub {
175 26518     26518   4954823 while (1) {
176 26525 100       78132 my $line = $pp->getline
177             or return undef; # end of input
178            
179             # handle "INCLUDE"
180 21983 100       2360405 if ($line->text =~ /^\s*(include\s+.*)/i) {
181 7         128 $pp->include_list("%$1"); # handle %include...
182 7         213 next; # get next line
183             }
184             else {
185 21976         259122 return $line;
186             }
187             }
188             }
189 4587         158136 );
190             }
191              
192             #------------------------------------------------------------------------------
193              
194             =head2 z80lexer
195              
196             This function takes as parameter a list of either text lines to parse,
197             or iterators that return text lines to parse.
198              
199             It calls C to split the input into a
200             L
201             of L objects representing the source
202             lines of the Z80 assembly language program.
203              
204             It returns a stream of L objects for
205             each assembly token in the input.
206              
207             Each token contains a type string, a value and a
208             L object pointing at the input line
209             where the token was found.
210              
211             =cut
212              
213             #------------------------------------------------------------------------------
214             # Keywords and composed symbols
215             my %KEYWORDS;
216             for (split(" ", "
217             a adc add af af' and b bc bit c call ccf cp cpd cpdr cpi cpir
218             cpl d daa de dec di djnz e ei equ ex exx h halt hl im
219             in inc ind indr ini inir ix iy jp jr l ld ldd lddr ldi ldir m
220             nc neg nop nz or otdr otir out outd outi p pe po pop push
221             res ret reti retn rl rla rlc rlca rld rr rra rrc rrca rrd rst
222             sbc scf set sla sll sli sp sra srl sub xor z
223             ixh ixl iyh iyl hx lx hy ly xh xl yh yl i r f
224             org stop defb defw deft defm defmz defm7 macro endm
225             ")) {
226             $KEYWORDS{$_}++;
227             }
228             my $SYMBOLS_RE = _regexp("
229             << >> == != >= <=
230             ");
231              
232             #------------------------------------------------------------------------------
233             # lexer
234             my $expand_escapes = sub {
235             local $_ = shift;
236             my $out = '';
237            
238             # remove quotes
239             s/^(['"])(.*)\1$/$2/ or die;
240            
241             while (! /\G \z/gcx) {
242             if (/\G \\ ([0-7]{1,3}) /gcx) { $out .= chr(oct($1)); }
243             elsif (/\G \\ x ([0-9a-f]{1,2}) /gcxi) { $out .= chr(hex($1)); }
244             elsif (/\G \\ a /gcx) { $out .= "\a"; }
245             elsif (/\G \\ b /gcx) { $out .= "\b"; }
246             elsif (/\G \\ e /gcx) { $out .= "\e"; }
247             elsif (/\G \\ f /gcx) { $out .= "\f"; }
248             elsif (/\G \\ n /gcx) { $out .= "\n"; }
249             elsif (/\G \\ r /gcx) { $out .= "\r"; }
250             elsif (/\G \\ t /gcx) { $out .= "\t"; }
251             elsif (/\G \\ v /gcx) { $out .= "\x0B"; }
252             elsif (/\G \\ ' /gcx) { $out .= "'"; }
253             elsif (/\G \\ " /gcx) { $out .= '"'; }
254             elsif (/\G \\ \\ /gcx) { $out .= "\\"; }
255             elsif (/\G \\ (.) /gcx) { $out .= $1; }
256             elsif (/\G \\ /gcx) { $out .= "\\"; }
257             elsif (/\G (.) /gcx) { $out .= $1; }
258             else { die; }
259             }
260            
261             return $out;
262             };
263            
264             my $lexer = Asm::Preproc::Lexer->new;
265             $lexer->make_lexer(
266            
267             # ignore comments and blanks except newline
268             COMMENT => qr/ ; .* /ix, undef,
269             BLANKS => qr/ [\t\f\r ]+ /ix, undef,
270              
271             # newline
272             NEWLINE => qr/ \n /ix, sub {["\n", "\n"]},
273              
274             # string - return without quotes
275             # Sequence (?|...) not recognized in regex in Perl 5.8
276             STRING => qr/ (?: \" (?: \\. | [^\\\"] )* \"
277             | \' (?: \\. | [^\\\'] )* \' ) /ix,
278             sub {[$_[0], $expand_escapes->($_[1])]},
279             # numbers
280             NUMBER => qr/ ( \d [0-9a-f]+ ) h \b /ix,
281             sub {[$_[0], oct("0x".$1)]},
282              
283             NUMBER => qr/ [\$\#] ( [0-9a-f]+ ) \b /ix,
284             sub {[$_[0], oct("0x".$1)]},
285            
286             NUMBER => qr/ ( [01]+ ) b \b /ix, sub {[$_[0], oct("0b".$1)]},
287            
288             NUMBER => qr/ % ( [01]+ ) \b /ix, sub {[$_[0], oct("0b".$1)]},
289            
290             NUMBER => qr/ 0x [0-9a-f]+ | 0b [01]+ \b /ix,
291             sub {[$_[0], oct(lc($_[1]))]},
292            
293             NUMBER => qr/ \d+ \b /ix, sub {[$_[0], 0+$_[1]]},
294            
295             # name or keyword, after numbers because of $FF syntax
296             NAME => qr/ af' | [a-z_]\w* | \$ /ix,
297             sub { my($t, $v) = @_;
298             my $k = lc($v);
299             $KEYWORDS{$k} ? [$k, $k] : [$t, $v];
300             },
301            
302             # symbols
303             SYMBOL => qr/ $SYMBOLS_RE | . /ix, sub {[$_[1], $_[1]]},
304              
305             );
306            
307             #------------------------------------------------------------------------------
308             # _lexer_stream(INPUT)
309             # INPUT is a Stream of $line = Asm::Preproc::Line,
310             # as returned by z80preprocessor()
311             # The result Stream contains CPU::Z80::Assembler:Token objects
312             # with token type, value, and the line where found
313             # Reserved words are returned with type = value in lower case.
314             sub _lexer_stream {
315 4583     4583   206582 my($input) = @_;
316 4583         13824 my $this_lexer = $lexer->clone; # compile $lexer only once
317 4583         341466 $this_lexer->from( $input ); # define lexer input stream
318            
319 4583         355006 return $this_lexer;
320             }
321              
322             #------------------------------------------------------------------------------
323             # _regexp(LIST)
324             # Return a regexp to match any of the strings included in LIST, as blank separated
325             # tokens
326 31     31   98 sub _regexp { my(@strings) = @_;
327 31         182 my $rt = Regexp::Trie->new;
328 31         198 for (@strings) {
329 31         125 for (split(" ", $_)) {
330 186         2699 $rt->add($_);
331             }
332             }
333 31         526 return $rt->_regexp; # case-insensitive
334             }
335              
336             #------------------------------------------------------------------------------
337             sub z80lexer {
338 4583     4583 1 34823 my(@input) = @_;
339 4583         10552 return _lexer_stream(z80preprocessor(@input));
340             }
341             #------------------------------------------------------------------------------
342              
343             =head1 SCRIPTS
344              
345             =head2 z80masm
346              
347             z80masm sourcefile [destfile]
348              
349             The L program, installed as part of this module, calls
350             the z80asm_file() function to assemble an input source file, generates
351             an output binary file, and produce an assembly listing on standard output.
352              
353             =cut
354              
355             #------------------------------------------------------------------------------
356              
357             =head1 SYNTAX
358              
359             =head2 Input line format
360              
361             Instructions are written in ASCII text.
362             Opcodes are separated by new-line or colon C<:> characters.
363             Comments start with C<;>.
364             Lines starting with C<#> are ignored, to handle files generated by pre-processors.
365              
366             ; comment beginning with ;
367             # comment beginning with # as first char on a line
368             [LABEL [:]] INSTRUCTION [: INSTRUCTION ...] [; optional comments]
369             LABEL [:]
370             LABEL = EXPRESSION [; ...]
371              
372             =head2 Preprocessing
373              
374             See L.
375              
376             =head2 Tokens
377              
378             The following tokens are returned by the stream:
379              
380             =head3 reserved words
381              
382             Asm::Preproc::Token('word', 'word', $line)
383              
384             All the reserved words and symbols are returned in lower case letters.
385              
386             =head3 strings
387              
388             Asm::Preproc::Token(STRING => $string, $line)
389              
390             Single- or double-quoted strings are accepted.
391             The quote character cannot be used inside the string.
392             The returned string has the quotes stripped.
393              
394             =head3 identifiers
395              
396             Asm::Preproc::Token(NAME => $name, $line)
397              
398             The program identifiers must start with a letter or underscore,
399             and consist solely of letters, underscores and numbers. There is a special case
400             C<$> identifier that represents the current location counter.
401              
402             Identifiers are returned with case preserved, i.e. the assembler is case-sensitive
403             for labels and case-insensitive for assembly reserved words.
404              
405             =head3 numbers
406              
407             Asm::Preproc::Token(NUMBER => $decimal_number, $line)
408              
409             Numbers are converted to decimal base from one of the following formats:
410              
411             =over 4
412              
413             =item *
414              
415             Decimal numbers are sequences of the digits 0..9, e.g. 159.
416              
417             =item *
418              
419             Hexadecimal number are sequences of the digits 0..9 and letters A..F,
420             prefixed by one of C<0x>, C<$> or C<#>;
421             or prefixed by a zero digit, if the number starts by a letter,
422             and suffixed by a C character, e.g. 0xFA21, 0FA21H, 10H, $FA21, #FA21.
423              
424             =item *
425              
426             Binary numbers are sequences of the digits 0..1,
427             prefixed by one of C<0b> or C<%>; or suffixed by C,
428             e.g. 0b10, 10B, %10.
429              
430             =back
431              
432             =head2 Z80 assembly
433              
434             See L for all allowed Z80 instructions, including
435             the undocumented Z80 opcodes and composed instructions.
436              
437             =head3 relative jumps
438              
439             The DJNZ and JR instructions take an address as their destination,
440             not an offset. If you need to use an offset, do sums on $. Note
441             that $
442             is the address of the *current* instruction. The offset needs to
443             be calculated from the address of the *next* instruction, which for
444             these instructions is always $ + 2.
445              
446             A relative jump instruction can always be used. The assembler automatically
447             replaces it with an absolute jump if the distance is too far, or if the given
448             flag is not available, e.g. C.
449             A C instruction is converted to C if the distance is too far.
450              
451             =head3 stop
452              
453             This extra instruction (which assembles to 0xDD 0xDD 0x00) is provided
454             for the convenience of those using the L module.
455              
456             =head2 Pseudo-instructions
457              
458             =head3 defb
459              
460             Accepts a list of expressions, and evaluates each as a byte to load to the
461             object file.
462              
463             =head3 defw
464              
465             Accepts a list of expressions, and evaluates each as a 16-bit word to load to the
466             object file, in little-endian order.
467              
468             =head3 defm, deft
469              
470             Accepts a list of literal strings, either single- or double-quoted.
471             The quoted text can not include the quotes surrounding it or newlines.
472             The characters are loaded to the object file.
473              
474             =head3 defmz
475              
476             Same as C, but appends a zero byte as string terminator after each string.
477              
478             =head3 defm7
479              
480             Same as C, but "inverts" (i.e. bit 7 set) the last character of the string,
481             as string terminator.
482              
483             =head3 equ, =
484              
485             Labels are created having the value of the address they are created at.
486              
487             Alternatively labels may be assigned expressions by using C or C<=>. The
488             expressions use the Perl operators and can refer to other labels by name, even
489             if they are defined further on the file. The C<$> can be used in the expression
490             to represent the current location counter.
491              
492             label = $ + 8
493             otherlabel = label / 2 + 3
494              
495             =head3 org
496              
497             Tell the assembler to start building the code at this address.
498             If it is not the first instruction of the assembly, the gap to the previous
499             location counter is filled with C<$CPU::Z80::Assembler::fill_byte>.
500             If absent, defaults to 0x0000.
501              
502             =head3 include
503              
504             Recursively include another file at the current source file.
505              
506             =head2 Macros
507              
508             Macros are supported. See L for details.
509              
510             =head1 BUGS and FEEDBACK
511              
512             We welcome feedback about our code, including constructive criticism.
513             Bug reports should be made using L.
514              
515             =head1 SEE ALSO
516              
517             L
518             L
519             L
520              
521             =head1 AUTHORS, COPYRIGHT and LICENCE
522              
523             Copyright (c) 2008-2009,
524             David Cantrell EFE,
525             Paulo Custodio EFE
526              
527             This software is free-as-in-speech software, and may be used,
528             distributed, and modified under the terms of either the GNU
529             General Public Licence version 2 or the Artistic Licence. It's
530             up to you which one you use. The full text of the licences can
531             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
532              
533             The Spectrum 48K ROM used in the test scripts is Copyright by Amstrad.
534             Amstrad have kindly given their permission for the
535             redistribution of their copyrighted material but retain that copyright
536             (see L).
537              
538             =head1 CONSPIRACY
539              
540             This software is also free-as-in-mason.
541              
542             =cut
543              
544             #------------------------------------------------------------------------------
545              
546             1;