File Coverage

blib/lib/Data/Hexdumper.pm
Criterion Covered Total %
statement 98 98 100.0
branch 48 54 88.8
condition 32 37 86.4
subroutine 7 7 100.0
pod 1 1 100.0
total 186 197 94.4


line stmt bran cond sub pod time code
1             package Data::Hexdumper;
2              
3 3     3   75495 use strict;
  3         7  
  3         118  
4 3     3   16 use warnings;
  3         5  
  3         100  
5 3     3   15 use vars qw($VERSION @ISA @EXPORT);
  3         11  
  3         419  
6              
7             $VERSION = "3.0001";
8              
9             require Exporter;
10             @ISA = qw(Exporter);
11             @EXPORT = qw(hexdump);
12              
13 3     3   24 use constant BIGENDIAN => (unpack("h*", pack("s", 1)) =~ /01/);
  3         7  
  3         389  
14 3     3   16 use constant LITTLEENDIAN => (unpack("h*", pack("s", 1)) =~ /^1/);
  3         6  
  3         4698  
15              
16             # static data, tells us the length of each type of word
17             my %num_bytes=(
18             '%C' => 1, # unsigned char
19             '%S' => 2, # unsigned 16-bit
20             '%L' => 4, # unsigned 32-bit
21             '%L<' => 4, # unsigned 32-bit, little-endian
22             '%L>' => 4, # unsigned 32-bit, big-endian
23             '%V' => 4, # unsigned 32-bit, little-endian
24             '%N' => 4, # unsigned 32-bit, big-endian
25             '%S<' => 2, # unsigned 16-bit, little-endian
26             '%S>' => 2, # unsigned 16-bit, big-endian
27             '%v' => 2, # unsigned 16-bit, little-endian
28             '%n' => 2, # unsigned 16-bit, big-endian
29             '%Q' => 8, # unsigned 64-bit
30             '%Q<' => 8, # unsigned 64-bit, little-endian
31             '%Q>' => 8, # unsigned 64-bit, big-endian
32             );
33              
34             my %number_format_to_new_format = (
35             'C' => ' %4a : %16C : %d',
36             'S' => ' %4a : %8S : %d',
37             'S<' => ' %4a : %8S< : %d',
38             'S>' => ' %4a : %8S> : %d',
39             'L' => ' %4a : %4L : %d',
40             'L<' => ' %4a : %4L< : %d',
41             'L>' => ' %4a : %4L> : %d',
42             'Q' => ' %4a : %2Q : %d',
43             'Q<' => ' %4a : %2Q< : %d',
44             'Q>' => ' %4a : %2Q> : %d',
45             );
46              
47             =head1 NAME
48              
49             Data::Hexdumper - Make binary data human-readable
50              
51             =head1 SYNOPSIS
52              
53             use Data::Hexdumper qw(hexdump);
54             print hexdump(
55             data => $data, # what to dump
56             # NB number_format is deprecated
57             number_format => 'S', # display as unsigned 'shorts'
58             start_position => 100, # start at this offset ...
59             end_position => 148 # ... and end at this offset
60             );
61             print hexdump(
62             "abcdefg",
63             { output_format => '%4a : %C %S< %L> : %d' }
64             );
65              
66             =head1 DESCRIPTION
67              
68             C provides a simple way to format arbitrary binary data
69             into a nice human-readable format, somewhat similar to the Unix 'hexdump'
70             utility.
71              
72             It gives the programmer a considerable degree of flexibility in how the
73             data is formatted, with sensible defaults. It is envisaged that it will
74             primarily be of use for those wrestling alligators in the swamp of binary
75             file formats, which is why it was written in the first place.
76              
77             =head1 SUBROUTINES
78              
79             The following subroutines are exported by default, although this is
80             deprecated and will be removed in some future version. Please pretend
81             that you need to ask the module to export them to you.
82              
83             If you do assume that the module will always export them, then you may
84             also assume that your code will break at some point after 1 Aug 2012.
85              
86             =head2 hexdump
87              
88             Does everything. Takes a hash of parameters, one of which is mandatory,
89             the rest having sensible defaults if not specified. Available parameters
90             are:
91              
92             =over
93              
94             =item data
95              
96             A scalar containing the binary data we're interested in. This is
97             mandatory.
98              
99             =item start_position
100              
101             An integer telling us where in C to start dumping. Defaults to the
102             beginning of C.
103              
104             =item end_position
105              
106             An integer telling us where in C to stop dumping. Defaults to the
107             end of C.
108              
109             =item number_format
110              
111             This is deprecated. See 'INCOMPATIBLE CHANGES' below. If you use this
112             your data will be padded with NULLs to be an integer multiple of 16 bytes.
113             You can expect number_format to be removed at some point in 2014 or later.
114              
115             A string specifying how to format the data. It can be any of the following,
116             which you will notice have the same meanings as they do to perl's C
117             function:
118              
119             =over
120              
121             =item C - unsigned char
122              
123             =item S - unsigned 16-bit, native endianness
124              
125             =item v or SE - unsigned 16-bit, little-endian
126              
127             =item n or SE - unsigned 16-bit, big-endian
128              
129             =item L - unsigned 32-bit, native endianness
130              
131             =item V or LE - unsigned 32-bit, little-endian
132              
133             =item N or LE - unsigned 32-bit, big-endian
134              
135             =item Q - unsigned 64-bit, native endianness
136              
137             =item QE - unsigned 64-bit, little-endian
138              
139             =item QE - unsigned 64-bit, big-endian
140              
141             =back
142              
143             Note that 64-bit formats are *always* available,
144             even if your perl is only 32-bit. Similarly, using E and E on
145             the S and L formats always works, even if you're using a pre 5.10.0 perl.
146             That's because this code doesn't use C.
147              
148             =item output_format
149              
150             This is an alternative and much more flexible (but more complex) method
151             of specifying the output format. Instead of specifying a single format
152             for all your output, you can specify formats like:
153              
154             %4a : %C %S %L> %Q : %d
155              
156             which will, on each line, display first the address (consisting of '0x'
157             and 4 hexadecimal digits, zero-padded if necessary), then a space, then
158             a colon, then a single byte of data, then a space, then an unsigned
159             16-bit value in native endianness, then a space, then an unsigned 32-bit
160             big-endian value, ... then a colon,
161             a space, then the characters representing your 15 byte record.
162              
163             You can use exactly the same characters and character sequences as are
164             specified above for number_format, plus 'a' for the address, and 'd'
165             for the data. To output a literal % character, use %% as is normal
166             with formats - see sprintf for details. To output a literal E or E
167             character where it may be confused with any of the {S,L,Q}{E,E}
168             sequences, use %E or %E. So, for example, to output a 16-bit
169             value in native endianness followed by <, use %S%<.
170              
171             %a takes an optional base-ten number between the % and the a signifying
172             the number of hexadecimal digits. This defaults to 4.
173              
174             %{C,S,L,Q} also take an optional base-ten number between the % and the letter,
175             signifying the number of repeats. These will be separated by spaces in
176             the output. So '%4C' is equivalent to '%C %C %C %C'.
177              
178             Anything else will get printed literally. This format
179             will be repeated for as many lines as necessary. If the amount of data
180             isn't enough to completely fill the last line, it will be padded with
181             NULL bytes.
182              
183             To specify both number_format and output_format is a fatal error.
184              
185             If neither are given, output_format defaults to:
186              
187             ' %4a : %16C : %d'
188              
189             which is equivalent to the old-style:
190              
191             number_format => 'C'
192              
193             =item suppress_warnings
194              
195             Make this true if you want to suppress any warnings - such as that your
196             data may have been padded with NULLs if it didn't exactly fit into an
197             integer number of words, or if you do something that is deprecated.
198              
199             =item space_as_space
200              
201             Make this true if you want spaces (ASCII character 0x20) to be printed as
202             spaces Otherwise, spaces will be printed as full stops / periods (ASCII
203             0x2E).
204              
205             =back
206              
207             Alternatively, you can supply the parameters as a scalar chunk of data
208             followed by an optional hashref of the other options:
209              
210             $results = hexdump($string);
211             $results = hexdump(
212             $string,
213             { start_position => 100, end_position => 148 }
214             );
215              
216             =cut
217              
218             sub hexdump {
219 50     50 1 5791 my @params = @_;
220             # first let's see if we need to massage the data into canonical form ...
221 50 100 100     652 if($#params == 0) { # one param: hexdump($string)
    100          
222 1         5 @params = (data => $params[0]);
223             } elsif($#params == 1 && ref($params[1])) { # two: hexdump($foo, {...})
224 6         37 @params = (
225             data => $params[0],
226 6         13 %{$params[1]}
227             )
228             }
229              
230 50         277 my %params=@params;
231 50         150 my($data, $number_format, $output_format, $start_position, $end_position)=
232             @params{qw(data number_format output_format start_position end_position)};
233              
234 50 100 100     170 die("can't have both number_format and output_format\n")
235             if($output_format && $number_format);
236 49   50     221 my $addr = $start_position ||= 0;
237 49   100     171 $end_position ||= length($data)-1;
238 49 100       115 if(!$output_format) {
239             # $output_format = ' %a : %C %C %C %C %C %C %C %C %C %C %C %C %C %C %C %C : %d';
240 41 50 66     180 warn("Data::Hexdumper: number_format is deprecated\n")
241             if($number_format && !$params{suppress_warnings});
242 41   100     89 $number_format ||= 'C';
243 41 100       87 if($number_format eq 'V') { $number_format = 'L<'; }
  3         6  
244 41 100       79 if($number_format eq 'N') { $number_format = 'L>'; }
  11         17  
245 41 100       83 if($number_format eq 'v') { $number_format = 'S<'; }
  3         6  
246 41 100       107 if($number_format eq 'n') { $number_format = 'S>'; }
  6         8  
247 41   100     1214 $output_format = $number_format_to_new_format{$number_format} ||
248             die("number_format not recognised\n");
249             }
250              
251 48         1009 my @format_elements_raw = split(//, $output_format);
252 48         183 my @format_elements;
253 48         122 while(@format_elements_raw) {
254 900         1161 push @format_elements, shift(@format_elements_raw);
255 900 100       3339 if($format_elements[-1] eq '%') {
256 165   66     1520 while(exists($format_elements_raw[0]) && $format_elements_raw[0] =~ /\d/) {
257 95         773 $format_elements[-1] .= shift(@format_elements_raw);
258             }
259 165 100 66     908 if(exists($format_elements_raw[0]) && $format_elements_raw[0] =~ /[adCSLQ%<>]/) {
260 164         312 $format_elements[-1] .= shift(@format_elements_raw);
261             }
262 165 100 100     1456 if($format_elements[-1] =~ /%([%<>])/) { $format_elements[-1] = $1 }
  4 100 100     12  
263             elsif($format_elements[-1] =~ /%\d*[QSL]/ &&
264             exists($format_elements_raw[0]) &&
265             $format_elements_raw[0] =~ /[<>]/
266 45         137 ) { $format_elements[-1] .= shift(@format_elements_raw); }
267             }
268             }
269              
270 900         1519 @format_elements = map {
271 48         93 my $format = $_;
272 900         1520 my @r;
273 900 100       3034 if($format =~ /^([^%]|%\d*a|%\D|%$)/) { push @r, $format; }
  856         1624  
274             else {
275 44         338 $format =~ /^%(\d+)(.*)/;
276 44         301 push @r, ('%'.$2, ' ') x $1;
277 44         56 pop @r; # get rid of the last space
278             }
279 900         3794 @r;
280             } @format_elements;
281              
282 48         184 my $chunk_length = 0;
283 48         80 foreach my $format (grep { /^%[CSLQ]/ } @format_elements) {
  1360         4192  
284 300         575 $chunk_length += $num_bytes{$format};
285             }
286              
287             # sanity-check the parameters
288 48 50       137 die("No data given to hexdump.") unless length($data);
289 48 50       189 die("start_position must be numeric.") if($start_position=~/\D/);
290 48 50       134 die("end_position must be numeric.") if($end_position=~/\D/);
291 48 50       844 die("end_position must not be before start_position.")
292             if($end_position < $start_position);
293              
294             # extract the required range and pad end with NULLs if necessary
295              
296 48         111 $data=substr($data, $start_position, 1+$end_position-$start_position);
297 48 100       240 if(length($data) / $chunk_length != int(length($data) / $chunk_length)) {
298 10 50       26 warn "Data::Hexdumper: data length isn't an integer multiple of lines\n".
299             "so has been padded with NULLs at the end.\n"
300             unless($params{suppress_warnings});
301 10         31 $data .= pack('C', 0) x ($chunk_length - length($data) + int(length($data)/$chunk_length)*$chunk_length);
302             }
303              
304 48         65 my $output=''; # where we put the formatted results
305              
306 48         133 while(length($data)) {
307             # Get a chunk
308 81         154 my $chunk = substr($data, 0, $chunk_length);
309 81 100       433 $data = ($chunk eq $data) ? '' : substr($data, $chunk_length);
310              
311 81         104 my $characters = $chunk;
312             # replace any non-printable character with .
313 81 100       152 if($params{space_as_space}) {
314 6         239 $characters =~ s/[^a-z0-9\\|,.<>;:'\@[{\]}#`!"\$%^&*()_+=~?\/ -]/./gi;
315             } else {
316 75         7522 $characters =~ s/[^a-z0-9\\|,.<>;:'\@[{\]}#`!"\$%^&*()_+=~?\/-]/./gi;
317             }
318              
319 81         186 foreach my $format (@format_elements) {
320 2357 100       6565 if(length($format) == 1) { # pass straight through
    100          
    100          
321 1708         9514 $output .= $format;
322             } elsif($format =~ /%(\d*)a/) { # address
323 81   100     252 my $nibbles = $1 || 4;
324 81         283 $output .= sprintf("0x%0${nibbles}X", $addr);
325             } elsif($format eq '%d') { # data
326 74         198 $output .= $characters;
327             } else {
328 494         863 my $word = substr($chunk, 0, $num_bytes{$format});
329 494 100       1549 if(length($chunk) > $num_bytes{$format}) {
330 413         1293 $chunk = substr($chunk, $num_bytes{$format});
331 81         115 } else { $chunk = ''; }
332 494         882 $output .= _format_word($format, $word);
333             }
334             }
335 81         116 $output .= "\n";
336 81         199 $addr += $chunk_length;
337             }
338 48         617 $output;
339             }
340              
341             sub _format_word {
342 494     494   789 my($format, $data) = @_;
343              
344             # big endian
345 494         1031 my @bytes = map { ord($_) } split(//, $data);
  1284         2536  
346             # make little endian if necessary
347 494 100 100     3159 @bytes = reverse(@bytes)
      66        
348             if($format =~ // && LITTLEENDIAN));
349 494         682 return join('', map { sprintf('%02X', $_) } @bytes);
  1284         4656  
350             }
351              
352             =head1 SEE ALSO
353              
354             L
355              
356             L if your needs are simple
357              
358             perldoc -f unpack
359              
360             perldoc -f pack
361              
362             =head1 INCOMPATIBLE CHANGES
363              
364             'number_format' is now implemented in terms of 'output_format'. Your data
365             will be padded to a multiple of 16 bytes. Previously-silent code may now
366             emit warnings.
367              
368             The mappings are:
369              
370             'C' => ' %4a : %C %C %C %C %C %C %C %C %C %C %C %C %C %C %C %C : %d'
371             'S' => ' %4a : %S %S %S %S %S %S %S %S : %d'
372             'S<' => ' %4a : %S< %S< %S< %S< %S< %S< %S< %S< : %d'
373             'S>' => ' %4a : %S> %S> %S> %S> %S> %S> %S> %S> : %d'
374             'L' => ' %4a : %L %L %L %L : %d'
375             'L<' => ' %4a : %L< %L< %L< %L< : %d'
376             'L>' => ' %4a : %L> %L> %L> %L> : %d'
377             'Q' => ' %4a : %Q %Q : %d'
378             'Q<' => ' %4a : %Q< %Q< : %d'
379             'Q>' => ' %4a : %Q> %Q> : %d'
380              
381             and of course:
382              
383             'V' => 'L<'
384             'N' => 'L>'
385             'v' => 'S<'
386             'n' => 'S>'
387              
388             =head1 BUGS/LIMITATIONS
389              
390             Behaviour of %a is not defined if your file is too big.
391              
392             Behaviour of %NNa is not defined if NN is too big for your sprintf implementation
393             to handle 0x%0${NN}X.
394              
395             =head1 FEEDBACK
396              
397             I welcome constructive criticism and bug reports. Please report bugs either
398             by email or via RT:
399             L
400              
401             The best bug reports contain a test file that fails with the current
402             code, and will pass once it has been fixed. The code repository
403             is on Github:
404             L
405              
406             =head1 AUTHOR, COPYRIGHT and LICENCE
407              
408             Copyright 2001 - 2012 David Cantrell EFE
409              
410             This software is free-as-in-speech software, and may be used,
411             distributed, and modified under the terms of either the GNU
412             General Public Licence version 2 or the Artistic Licence. It's
413             up to you which one you use. The full text of the licences can
414             be found in the files GPL2.txt and ARTISTIC.txt, respectively.
415              
416             =head1 CONSPIRACY
417              
418             This module is also free-as-in-mason software.
419              
420             =head1 THANKS TO ...
421              
422             MHX, for reporting a bug when dumping a single byte of data
423              
424             Stefan Siegl, for reporting a bug when dumping an ASCII 0
425              
426             Steffen Winkler, for inspiring me to use proper output formats
427              
428             =cut
429              
430             1;