File Coverage

blib/lib/Linux/SocketFilter/Assembler.pm
Criterion Covered Total %
statement 54 95 56.8
branch 32 56 57.1
condition 0 3 0.0
subroutine 13 30 43.3
pod 1 25 4.0
total 100 209 47.8


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2010 -- leonerd@leonerd.org.uk
5              
6             package Linux::SocketFilter::Assembler;
7              
8 2     2   42068 use strict;
  2         5  
  2         67  
9 2     2   7 use warnings;
  2         5  
  2         94  
10              
11             our $VERSION = '0.04';
12              
13 2     2   593 use Linux::SocketFilter qw( :bpf :skf pack_sock_filter );
  2         4  
  2         563  
14              
15 2     2   8 use Exporter 'import';
  2         4  
  2         4001  
16             our @EXPORT_OK = qw(
17             assemble
18             );
19              
20             =head1 NAME
21              
22             C - assemble BPF programs from textual code
23              
24             =head1 SYNOPSIS
25              
26             use Linux::SocketFilter;
27             use Linux::SocketFilter::Assembler qw( assemble );
28             use IO::Socket::Packet;
29             use Socket qw( SOCK_DGRAM );
30              
31             my $sock = IO::Socket::Packet->new(
32             IfIndex => 0,
33             Type => SOCK_DGRAM,
34             ) or die "Cannot socket - $!";
35              
36             $sock->attach_filter( assemble( <<"EOF" ) );
37             LD AD[PROTOCOL]
38              
39             JEQ 0x0800, 0, 1
40             RET 20
41              
42             JEQ 0x86dd, 0, 1
43             RET 40
44              
45             RET 0
46             EOF
47              
48             while( my $addr = $sock->recv( my $buffer, 40 ) ) {
49             printf "Packet: %v02x\n", $buffer;
50             }
51              
52             =head1 DESCRIPTION
53              
54             Linux sockets allow a filter to be attached, which determines which packets
55             will be allowed through, and which to block. They are most often used on
56             C sockets when used to capture network traffic, as a filter to
57             determine the traffic of interest to the capturing application. By running
58             directly in the kernel, the filter can discard all, or most, of the traffic
59             that is not interesting to the application, allowing higher performance due
60             to reduced context switches between kernel and userland.
61              
62             This module allows filter programs to be written in textual code, and
63             assembled into a binary filter, to attach to the socket using the
64             C socket option.
65              
66             =cut
67              
68             =head1 FILTER MACHINE
69              
70             The virtual machine on which these programs run is a simple load/store
71             register machine operating on 32-bit words. It has one general-purpose
72             accumulator register (C) and one special purpose index register (C).
73             It has a number of temporary storage locations, called scratchpads (C).
74             It is given read access to the contents of the packet to be filtered in 8-bit
75             (C), 16-bit (C) or 32-bit (C) sized quantities. It
76             also has an implicit program counter, though direct access to it is not
77             provided.
78              
79             The filter program is run by the kernel on every packet captured by the socket
80             to which it is attached. It can inspect data in the packet and certain other
81             items of metadata concerning the packet, and decide if this packet should be
82             accepted by the capture socket. It returns the number of bytes to capture if
83             it should be captured, or zero to indicate this packet should be ignored. It
84             starts on the first instruction, and proceeds forwards, unless the flow is
85             modified by a jump instruction. The program terminates on a C
86             instruction, which informs the kernel of the required fate of the packet. The
87             last instruction in the filter must therefore be a C instruction; though
88             others may appear at earlier points.
89              
90             In order to guarantee termination of the program in all circumstances, the
91             virtual machine is not fully Turing-powerful. All jumps, conditional or
92             unconditional, may only jump forwards in the program. It is not possible to
93             construct a loop of instructions that executes repeatedly.
94              
95             =cut
96              
97             =head1 FUNCTIONS
98              
99             =cut
100              
101             =head2 $filter = assemble( $text )
102              
103             Takes a program (fragment) in text form and returns a binary string
104             representing the instructions packed ready for C.
105              
106             The program consists of C<\n>-separated lines of instructions or comments.
107             Leading whitespace is ignored. Blank lines are ignored. Lines beginning with
108             a C<;> (after whitespace) are ignored as comments.
109              
110             =cut
111              
112             sub assemble
113             {
114 24     24 1 17058 my $self = __PACKAGE__;
115 24         41 my ( $text ) = @_;
116              
117 24         28 my $ret = "";
118              
119 24         77 foreach ( split m/\n/, $text ) {
120 24         96 s/^\s+//; # trim whitespace
121 24 50       65 next if m/^$/; # skip blanks
122 24 100       54 next if m/^;/; # skip comments
123              
124 23         64 my ( $op, $args ) = split ' ', $_, 2;
125 23 100       86 my @args = defined $args ? split m/,\s*/, $args : ();
126              
127 23 50       182 $self->can( "assemble_$op" ) or
128             die "Can't compile $_ - unrecognised op '$op'\n";
129              
130 23         41 $ret .= $self->${\"assemble_$op"}( @args );
  23         89  
131             }
132              
133 24         101 return $ret;
134             }
135              
136             =head1 INSTRUCTION FORMAT
137              
138             Each instruction in the program is formed of an opcode followed by its
139             operands. Where numeric literals are involved, they may be given in decimal,
140             hexadecimal, or octal form. Literals will be notated as C in the
141             following descriptions.
142              
143             =cut
144              
145             my $match_literal = qr/-?(?:\d+|0x[0-9a-f]+)/;
146             sub _parse_literal
147             {
148 17     17   39 my ( $lit ) = @_;
149              
150 17 50       37 my $sign = ( $lit =~ s/^-// ) ? -1 : 1;
151              
152 17 100       65 return $sign * oct( $lit ) if $lit =~ m/^0x?/; # oct can manage octal or hex
153 11 50       77 return $sign * int( $lit ) if $lit =~ m/\d+/;
154              
155 0         0 die "Cannot parse literal $lit\n";
156             }
157              
158             =pod
159              
160             LD BYTE[addr]
161             LD HALF[addr]
162             LD WORD[addr]
163              
164             Load the C register from the 8, 16, or 32-bit quantity in the packet buffer
165             at the address. The address may be given in the forms
166              
167             lit
168             X+lit
169             NET+lit
170             NET+X+lit
171              
172             To load from an immediate or C-index address, starting from either the
173             beginning of the buffer, or the beginning of the network header, respectively.
174              
175             LD len
176              
177             Load the C register with the length of the packet.
178              
179             LD lit
180              
181             Load the C register with a literal value
182              
183             LD M[lit]
184              
185             Load the C register with the value from the given scratchpad cell
186              
187             LD X
188             TXA
189              
190             Load the C register with the value from the C register. (These two
191             instructions are synonymous)
192              
193             LD AD[name]
194              
195             Load the C register with a value from the packet auxiliary data area. The
196             following data points are available.
197              
198             =over 4
199              
200             =over 4
201              
202             =item PROTOCOL
203              
204             The ethertype protocol number of the packet
205              
206             =item PKTTYPE
207              
208             The type of the packet; see the C constants defined in
209             L.
210              
211             =item IFINDEX
212              
213             The index of the interface the packet was received on or transmitted from.
214              
215             =back
216              
217             =back
218              
219             =cut
220              
221             my %auxdata_offsets = (
222             PROTOCOL => SKF_AD_PROTOCOL,
223             PKTTYPE => SKF_AD_PKTTYPE,
224             IFINDEX => SKF_AD_IFINDEX,
225             );
226              
227             sub assemble_LD
228             {
229 16     16 0 25 my ( undef, $src ) = @_;
230              
231 16         20 my $code = BPF_LD;
232              
233 16 100 0     278 if( $src =~ m/^(BYTE|HALF|WORD)\[(NET\+)?(X\+)?($match_literal)]$/ ) {
    50          
    100          
    50          
    50          
    0          
234 8         20 my ( $size, $net, $x, $offs ) = ( $1, $2, $3, _parse_literal($4) );
235              
236 8 100       23 $code |= ( $size eq "BYTE" ) ? BPF_B :
    100          
237             ( $size eq "HALF" ) ? BPF_H :
238             BPF_W;
239 8 100       16 $code |= ( $x ) ? BPF_IND :
240             BPF_ABS;
241              
242 8 100       15 $offs += SKF_NET_OFF if $net;
243              
244 8         22 pack_sock_filter( $code, 0, 0, $offs );
245             }
246             elsif( $src eq "len" ) {
247 0         0 pack_sock_filter( $code|BPF_W|BPF_LEN, 0, 0, 0 );
248             }
249             elsif( $src =~ m/^$match_literal$/ ) {
250 7         19 pack_sock_filter( $code|BPF_IMM, 0, 0, _parse_literal($src) );
251             }
252             elsif( $src =~ m/^M\[($match_literal)\]$/ ) {
253 0         0 pack_sock_filter( $code|BPF_MEM, 0, 0, _parse_literal($1) );
254             }
255             elsif( $src eq "X" ) {
256 1         5 pack_sock_filter( BPF_MISC|BPF_TXA, 0, 0, 0 );
257             }
258             elsif( $src =~ m/^AD\[(.*)\]$/ and exists $auxdata_offsets{$1} ) {
259 0         0 pack_sock_filter( $code|BPF_W|BPF_ABS, 0, 0, SKF_AD_OFF + $auxdata_offsets{$1} );
260             }
261             else {
262 0         0 die "Unrecognised instruction LD $src\n";
263             }
264             }
265              
266 1     1 0 15 sub assemble_TXA { pack_sock_filter( BPF_MISC|BPF_TXA, 0, 0, 0 ) }
267              
268             =pod
269              
270             LDX lit
271              
272             Load the C register with a literal value
273              
274             LDX M[lit]
275              
276             Load the C register with the value from the given scratchpad cell
277              
278             LDX A
279             TAX
280              
281             Load the C register with the value from the C register. (These two
282             instructions are synonymous)
283              
284             =cut
285              
286             sub assemble_LDX
287             {
288 2     2 0 6 my ( undef, $src ) = @_;
289              
290 2         5 my $code = BPF_LDX;
291              
292 2 100       99 if( $src =~ m/^$match_literal$/ ) {
    50          
    50          
293 1         5 pack_sock_filter( $code|BPF_IMM, 0, 0, _parse_literal($src) );
294             }
295             elsif( $src =~ m/^M\[($match_literal)\]$/ ) {
296 0         0 pack_sock_filter( $code|BPF_MEM, 0, 0, _parse_literal($1) );
297             }
298             elsif( $src eq "A" ) {
299 1         5 pack_sock_filter( BPF_MISC|BPF_TAX, 0, 0, 0 );
300             }
301             else {
302 0         0 die "Unrecognised instruction LDX $src\n";
303             }
304             }
305              
306 1     1 0 5 sub assemble_TAX { pack_sock_filter( BPF_MISC|BPF_TAX, 0, 0, 0 ) }
307              
308             =pod
309              
310             LDMSHX BYTE[lit]
311              
312             Load the C register with a value obtained from a byte in the packet masked
313             and shifted (hence the name). The byte at the literal address is masked by
314             C<0x0f> to obtain the lower 4 bits, then shifted 2 bits upwards. This
315             special-purpose instruction loads the C register with the size, in bytes,
316             of an IPv4 header beginning at the given literal address.
317              
318             =cut
319              
320             sub assemble_LDMSHX
321             {
322 0     0 0 0 my ( undef, $src ) = @_;
323              
324 0 0       0 if( $src =~ m/^BYTE\[($match_literal)\]$/ ) {
325 0         0 pack_sock_filter( BPF_LDX|BPF_MSH|BPF_B, 0, 0, _parse_literal($1) );
326             }
327             else {
328 0         0 die "Unrecognised instruction LDMSHX $src\n";
329             }
330             }
331              
332             =pod
333              
334             ST M[lit]
335              
336             Store the value of the C register into the given scratchpad cell
337              
338             STX M[lit]
339              
340             Store the value of the C register into the given scratchpad cell
341              
342             =cut
343              
344 0     0 0 0 sub assemble_ST { shift->assemble_store( BPF_ST, @_ ) }
345 0     0 0 0 sub assemble_STX { shift->assemble_store( BPF_STX, @_ ) }
346             sub assemble_store
347             {
348 0     0 0 0 my ( undef, $code, $dest ) = @_;
349              
350 0 0       0 if( $dest =~ m/^M\[($match_literal)\]$/ ) {
351 0         0 pack_sock_filter( $code, 0, 0, _parse_literal($1) );
352             }
353             else {
354 0         0 die "Unrecognised instruction ST(X?) $dest\n";
355             }
356             }
357              
358             =pod
359              
360             ADD src # A = A + src
361             SUB src # A = A - src
362             MUL src # A = A * src
363             DIV src # A = A / src
364             AND src # A = A & src
365             OR src # A = A | src
366             LSH src # A = A << src
367             RSH src # A = A >> src
368              
369             Perform arithmetic or bitwise operations. In each case, the operands are the
370             C register and the given source, which can be either the C register or
371             a literal. The result is stored in the C register.
372              
373             =cut
374              
375 0     0 0 0 sub assemble_ADD { shift->assemble_alu( BPF_ADD, @_ ) }
376 0     0 0 0 sub assemble_SUB { shift->assemble_alu( BPF_SUB, @_ ) }
377 0     0 0 0 sub assemble_MUL { shift->assemble_alu( BPF_MUL, @_ ) }
378 0     0 0 0 sub assemble_DIV { shift->assemble_alu( BPF_DIV, @_ ) }
379 0     0 0 0 sub assemble_AND { shift->assemble_alu( BPF_AND, @_ ) }
380 0     0 0 0 sub assemble_OR { shift->assemble_alu( BPF_OR, @_ ) }
381 0     0 0 0 sub assemble_LSH { shift->assemble_alu( BPF_LSH, @_ ) }
382 0     0 0 0 sub assemble_RSH { shift->assemble_alu( BPF_RSH, @_ ) }
383             sub assemble_alu
384             {
385 0     0 0 0 my ( undef, $code, $val ) = @_;
386              
387 0         0 $code |= BPF_ALU;
388 0 0       0 if( $val eq "X" ) {
    0          
389 0         0 pack_sock_filter( $code|BPF_X, 0, 0, 0 );
390             }
391             elsif( $val =~ m/^$match_literal$/ ) {
392 0         0 pack_sock_filter( $code|BPF_K, 0, 0, _parse_literal($val) );
393             }
394             else {
395 0         0 die "Unrecognised alu instruction on $val\n";
396             }
397             }
398              
399             =pod
400              
401             JGT src, jt, jf # test if A > src
402             JGE src, jt, jf # test if A >= src
403             JEQ src, jt, jf # test if A == src
404             JSET src, jt, jf # test if A & src is non-zero
405              
406             Jump conditionally based on comparisons between the C register and the
407             given source, which is either the C register or a literal. If the
408             comparison is true, the C branch is taken; if false the C. Each branch
409             is a numeric count of the number of instructions to skip forwards.
410              
411             =cut
412              
413 2     2 0 11 sub assemble_JGT { shift->assemble_jmp( BPF_JGT, @_ ) }
414 0     0 0 0 sub assemble_JGE { shift->assemble_jmp( BPF_JGE, @_ ) }
415 0     0 0 0 sub assemble_JSET { shift->assemble_jmp( BPF_JSET, @_ ) }
416 0     0 0 0 sub assemble_JEQ { shift->assemble_jmp( BPF_JEQ, @_ ) }
417             sub assemble_jmp
418             {
419 2     2 0 6 my ( undef, $code, $val, $jt, $jf ) = @_;
420              
421 2         4 $code |= BPF_JMP;
422 2 100       65 if( $val eq "X" ) {
    50          
423 1         6 pack_sock_filter( $code|BPF_X, $jt, $jf, 0 );
424             }
425             elsif( $val =~ m/^$match_literal$/ ) {
426 1         5 pack_sock_filter( $code|BPF_K, $jt, $jf, _parse_literal($val) );
427             }
428             else {
429 0         0 die "Unrecognised jmp instruction on $val\n";
430             }
431             }
432              
433             =pod
434              
435             JA jmp
436              
437             Jump unconditionally forward by the given number of instructions.
438              
439             =cut
440              
441             sub assemble_JA
442             {
443 1     1 0 5 my ( undef, $target ) = @_;
444 1         6 pack_sock_filter( BPF_JMP, 0, 0, $target+0 );
445             }
446              
447             =pod
448              
449             RET lit
450              
451             Terminate the filter program and return the literal value to the kernel.
452              
453             RET A
454              
455             Terminate the filter program and return the value of the C register to the
456             kernel.
457              
458             =cut
459              
460             sub assemble_RET
461             {
462 0     0 0   my ( undef, $val ) = @_;
463              
464 0           my $code = BPF_RET;
465              
466 0 0         if( $val =~ m/^$match_literal$/ ) {
    0          
467 0           pack_sock_filter( $code|BPF_K, 0, 0, _parse_literal($val) );
468             }
469             elsif( $val eq "A" ) {
470 0           pack_sock_filter( $code|BPF_A, 0, 0, 0 );
471             }
472             else {
473 0           die "Unrecognised instruction RET $val\n";
474             }
475             }
476              
477             # Keep perl happy; keep Britain tidy
478             1;
479              
480             =head1 AUTHOR
481              
482             Paul Evans