File Coverage

blib/lib/BSON/Decode.pm
Criterion Covered Total %
statement 134 164 81.7
branch 37 50 74.0
condition 19 23 82.6
subroutine 20 27 74.0
pod 5 5 100.0
total 215 269 79.9


line stmt bran cond sub pod time code
1             package BSON::Decode;
2              
3 3     3   44554 use 5.006;
  3         6  
4 3     3   9 use strict;
  3         3  
  3         48  
5 3     3   8 use warnings;
  3         6  
  3         65  
6 3     3   8 use Carp qw(carp croak);
  3         4  
  3         4026  
7              
8             =head1 NAME
9              
10             BSON::Decode - Decode BSON file and return a perl data structure!
11              
12             =head1 VERSION
13              
14             Version 0.06
15              
16             =cut
17              
18             our $VERSION = '0.06';
19              
20             =head1 SYNOPSIS
21              
22             A small module to parse BSON file to a perl structure
23              
24             my $file = shift;
25             open( my $INF ,$file);
26             my $bs = BSON::Decode->new($INF);
27              
28             say Dumper $bs->fetch(); # read first element
29             say Dumper $bs->fetch(); # read second element
30             $bs->rewind();# rewind the filehandle
31             say Dumper $bs->fetch_all(); # read all element in an ARRAY
32             close $INF;
33              
34              
35              
36             =cut
37              
38             =head1 DESCRIPTION
39              
40             This is a pure perl BSON decoder. It is simple and without any dependencies.
41             (It was mandatory to allow use in an embeded system)
42             The main use is to compare 2 BSON files
43              
44             Some of the BSON grammar's element are not implemented
45              
46             \x06 Undefined (value) Deprecated
47             \x0C DBPointer Deprecated
48             \x0E Deprecated
49              
50             For binary content (\x05)
51             The data is uuencoded as value of the subtype.
52              
53             =cut
54              
55             =head1 METHOD
56              
57             =head2 new
58              
59             Instanciate the parser.
60              
61             By default the parser read from STDIN (unix filter).
62             In that case no rewind is possible. Only sequential read.
63             my $bs = BSON::Decode->new();
64             It is possible to pass an argument to the constructor to chose another source.
65              
66             =head3 filehandle
67              
68             an open filehandle from where to read the BSON data:
69             open( my $INF ,$file);
70             my $bs = BSON::Decode->new($INF);
71              
72             =cut
73              
74             =head3 file name
75              
76             this a path to an existing BSON file:
77             my $bs = BSON::Decode->new($file);
78              
79             =cut
80              
81             =head3 scalar
82              
83             a perl scalar with the BSON data:
84             my $bs = BSON::Decode->new($bson);
85              
86             =cut
87              
88             sub new {
89 2     2 1 61695 my $class = shift;
90              
91 2         3 my $self = {};
92 2         4 bless( $self, $class );
93 2   50     14 $self->{fh} = shift // '';
94 2         3 my $valid = 0;
95             {
96 2         3 local $@ = "";
  2         2  
97 2         4 my $fd = eval { fileno $self->{fh} };
  2         12  
98 2   33     12 $valid = !$@ && defined $fd;
99             }
100 2 50       8 if ( !$valid ) {
101 2 100       41 if ( -f $self->{fh} ) {
    50          
102 1         2 my $f_name = delete $self->{fh};
103 1         19 open $self->{fh}, '<', $f_name;
104             } elsif ( length $self->{fh} ) {
105 1         3 $self->{buffer} = $self->{buffer_bck} = delete $self->{fh};
106             } else {
107 0         0 $self->{fh} = *STDIN;
108 0         0 $self->{stdin} = 1;
109             }
110             }
111 2 100       15 binmode( $self->{fh} ) if exists $self->{fh};
112              
113             $self->{func} = {
114             1 => sub { ## 64-bit float
115 3     3   3 my ( $bson, $name ) = @_;
116 3         6 my $data = substr( $$bson, 0, 8, '' );
117 3         13 return unpack( "d", $data );
118             },
119             2 => sub { ## UTF-8 string
120 40     40   30 my ( $bson, $name ) = @_;
121 40         50 my $size = unpack( "i", substr( $$bson, 0, 4, '' ) );
122 40         52 my $data = substr( $$bson, 0, $size - 1, '' );
123              
124 40         114 return unpack( "a*", $data );
125             },
126             3 => sub { ## Embedded document
127 16     16   16 my ( $bson, $name ) = @_;
128 16         17 my $size = unpack( "i", substr( $$bson, 0, 4, '' ) );
129 16         34 return $self->_document( $bson, $size - 4 );
130             },
131             4 => sub { ## Array
132 8     8   6 my ( $bson, $name ) = @_;
133 8         10 my $size = unpack( "i", substr( $$bson, 0, 4, '' ) );
134 8         11 my $r = $self->_document( $bson, $size - 4 );
135 8         5 my @t;
136 8         21 foreach my $k ( sort keys %$r ) {
137 26 50       73 push @t, $r->{$k} if $k =~ /^\d+$/;
138             }
139 8         25 return \@t;
140             },
141             5 => sub { ## Binary data
142 0     0   0 my ( $bson, $name ) = @_;
143 0         0 my $size = unpack( "i", substr( $$bson, 0, 4, '' ) );
144 0         0 my $subtype = substr( $$bson, 0, 1, '' );
145 0         0 my $binary = pack( 'u', substr( $$bson, 0, $size, '' ) );
146 0         0 return ( $subtype => $binary );
147             },
148             7 => sub { ## ObjectId
149 9     9   14 my ( $bson, $name ) = @_;
150 9         27 my $data = substr( $$bson, 0, 12, '' );
151 9         35 return unpack( "H*", $data );
152             },
153             8 => sub { ## Boolean
154 6     6   8 my ( $bson, $name ) = @_;
155 6         8 my $data = substr( $$bson, 0, 1, '' );
156 6         22 return oct( "0x" . unpack( "H*", $data ) );
157             },
158             9 => sub { ## UTC datetime
159 5     5   5 my ( $bson, $name ) = @_;
160 5         9 my $data = substr( $$bson, 0, 8, '' );
161 5         23 return unpack( "q", $data );
162             },
163             10 => sub { ## Null value
164 0     0   0 my ( $bson, $name ) = @_;
165 0         0 return 'null';
166             },
167             11 => sub { ## REGEX
168 2     2   2 my ( $bson, $name ) = @_;
169 2         4 my $regex = unpack( "Z*", $$bson );
170 2         5 substr $$bson, 0, length($regex) + 1, '';
171 2         7 my $regex_options = unpack( "Z*", $$bson );
172 2         5 substr $$bson, 0, length($regex_options) + 1, '';
173 2         10 return '/' . $regex . '/' . $regex_options;
174             },
175             13 => sub { ## javascript code
176 0     0   0 my ( $bson, $name ) = @_;
177 0         0 my $size = unpack( "i", substr( $$bson, 0, 4, '' ) );
178 0         0 my $data = substr( $$bson, 0, $size - 1, '' );
179 0         0 return unpack( "a*", $data );
180             },
181             16 => sub { ## 32-bit integer
182 10     10   6 my ( $bson, $name ) = @_;
183 10         9 my $data = substr( $$bson, 0, 4, '' );
184 10         22 return unpack( "i", $data );
185             },
186             17 => sub { ## timestamp
187 4     4   6 my ( $bson, $name ) = @_;
188 4         8 my $data = substr( $$bson, 0, 8, '' );
189 4         15 return unpack( "Q", $data );
190             },
191             18 => sub { ## 64-bit integer
192 0     0   0 my ( $bson, $name ) = @_;
193 0         0 my $data = substr( $$bson, 0, 8, '' );
194 0         0 return unpack( "Q", $data );
195              
196             },
197             19 => sub { ## 128-bit integer
198 0     0   0 my ( $bson, $name ) = @_;
199 0         0 my $data = substr( $$bson, 0, 16, '' );
200 0         0 return unpack( "Q", $data );
201             },
202             127 => sub { ## MAX key
203 0     0   0 my ( $bson, $name ) = @_;
204 0         0 return 'maxkey';
205             },
206             255 => sub { ## MIN key
207 0     0   0 my ( $bson, $name ) = @_;
208 0         0 return 'minkey';
209             }
210 2         77 };
211 2         7 return $self;
212             }
213              
214             =head2 fetch_all
215              
216             Parse the BSON data and return an ARRAY with each element in a perl structure.
217             The parsing is done from the current position.
218             e.g.
219             my $bs = BSON::Decode->new($bson_data);
220             my $bson = $bs->fetch_all();
221              
222             If a parameter is provided, this override the parameter from the class new
223             And allow a one line fetch_all like this:
224             my $bson = BSON::Decode->fetch_all('t/test1.bson');
225              
226             =cut
227              
228             sub fetch_all {
229 3     3 1 38 my ($self, $input) = @_;
230              
231 3 50       7 $self=$self->new($input) if ($input);
232 3         4 my @all;
233 3         15 $self->{item_nbr} = 0;
234 3 100       5 if ( $self->{fh} ) {
235 2         25 push @all, $self->fetch() while ( !eof $self->{fh} );
236             } else {
237 1         5 push @all, $self->fetch() while ( $self->{buffer} );
238             }
239 3         7 return \@all;
240             }
241              
242             =head2 fetch
243              
244             Parse the BSON data and return the next element in a perl structure.
245             e.g.
246             my $bs = BSON::Decode->new($bson_data);
247             my $bson = $bs->fetch();
248              
249             =cut
250              
251             sub fetch {
252 7     7 1 490 my ($self) = @_;
253 7         8 $self->{item_nbr}++;
254 7         4 my $res;
255             my $sizebits;
256 0         0 my $bson;
257 0         0 my $size;
258 7 100       11 if ( $self->{fh} ) {
259 2         6 my $n = read( $self->{fh}, $sizebits, 4 );
260 2 50       4 carp "error reading size\n" if ( $n != 4 );
261              
262 2         5 $size = unpack( "i", $sizebits );
263 2         3 $size -= 4; # -4 because the size includes itself
264 2         3 $n = read( $self->{fh}, $bson, $size );
265 2 50       4 carp "error reading bson string\n" if ( $n != $size );
266             } else {
267              
268 5         14 $sizebits = substr( $self->{buffer}, 0, 4, '' );
269 5         11 $size = unpack( "i", $sizebits );
270 5         8 $size -= 4;
271 5         19 $bson = substr( $self->{buffer}, 0, $size, '' );
272             }
273 7 50       16 if ( length $bson ) {
274 7         9 my $sep = substr( $bson, -1, 1 );
275 7 50       16 croak( "Bad record seperator '" . unpack( "H*", $sep ) . "'" ) if ( $sep ne "\x00" );
276             }
277 7         15 $res = $self->_document( \$bson, $size );
278 7         26 return $res;
279             }
280              
281             =head2 rewind
282              
283             Rewind the file descriptor ( or buffer ).
284             !!! BUT NOT FOR STDIN !!!
285             e.g.
286             my $bs = BSON::Decode->new($file);
287             say Dumper $bs->fetch();
288              
289             $bs->rewind();
290             say Dumper $bs->fetch_all(); # all from beginning if no rewind read all remaining elements after the first one.
291              
292             =cut
293              
294             sub rewind {
295 3     3 1 2607 my ($self) = @_;
296 3 50       8 if ( $self->{stdin} ) {
297 0         0 carp("No rewind for STDIN");
298             } else {
299 3 100       5 if ( exists $self->{fh} ) {
300 1         4 seek( $self->{fh}, 0, 0 );
301             } else {
302 2         3 $self->{buffer} = $self->{buffer_bck};
303             }
304             }
305 3         5 return;
306             }
307              
308              
309             sub _document {
310 31     31   26 my ( $self, $str, $size ) = @_;
311 31         26 my $res = {};
312 31         39 my $bson = substr( $$str, 0, $size, '' );
313 31 50       41 if ( length($bson) != $size ) {
314 0         0 die "error reading bson string " . length($bson) . " != $size\n";
315             }
316 31         32 my $sep = substr( $bson, -1, 1 );
317 31 50       40 if ( $sep ne "\x00" ) {
318 0         0 die("Bad record seperator '$sep'");
319             }
320 31         41 while ( length($bson) ) {
321 174         271 my $element = oct( "0x" . unpack( "H*", substr( $bson, 0, 1, '' ) ) );
322 174         125 my $name;
323 174 100       260 next if ( $element == 0 );
324 103         117 $name = unpack( "Z*", $bson );
325 103         97 substr $bson, 0, length($name) + 1, '';
326 103 50       130 if ( exists $self->{func}{$element} ) {
327 103         132 $res->{$name} = $self->{func}{$element}->( \$bson, $name );
328             } else {
329 0         0 warn "Type $element not implemented for $name";
330 0         0 last;
331             }
332             }
333 31         48 return $res;
334             }
335              
336             =head2 delete_hash_deep
337              
338             Function to delete some keys matching a regex from a hash in deep (multiple level)
339             This allow to compare 2 BSON data but skip some fields.
340             e.g.
341             use BSON::Decode
342             use Text::Diff;
343             my @skip = ( lastUpdated$', '(?i)timestamp', '^uptimeMs$' );
344             my $codec1 = BSON::Decode->new( $file1 );
345             my $bson1 = $codec1->fetch_all();
346              
347             my $codec2 = BSON::Decode->new( $file2 );
348             my $bson2 = $codec2->fetch_all();
349              
350             delete_hash_deep( $bson1, \@skip );
351             delete_hash_deep( $bson2, \@skip );
352              
353             my $b1 = Dumper($bson1);
354             my $b2 = Dumper($bson2);
355              
356             my $diff = diff( \$b1, \$b2 );
357              
358             say "$all1{$f}; diff=<$diff>" if $diff;
359              
360              
361             !!! if the hash is empty, it stay in the perl structure
362              
363             =cut
364              
365             sub delete_hash_deep {
366 42     42 1 45 my ( $hash, $allowed_keys, $clean ) = @_;
367              
368 42 100 100     97 if ( ref($hash) && ref($hash) eq "ARRAY" ) {
369 10         17 foreach my $h ( 0 .. $#$hash ) {
370 28         42 delete_hash_deep( $hash->[$h], $allowed_keys, $clean );
371 28 100 100     70 delete $hash->[$h] if ( $clean && ref( $hash->[$h] ) eq 'HASH' && keys %{ $hash->[$h] } == 0 );
  6   100     21  
372             }
373             } else {
374 32 100       54 if ( ref($hash) eq 'HASH' ) {
375 18         10 foreach my $k ( keys %{$hash} ) {
  18         36  
376 50 100       29 if ( ( grep { $k =~ /$_/ } @{$allowed_keys} ) ) {
  150         712  
  50         45  
377 14         16 delete $hash->{$k};
378 14         19 next;
379             } else {
380 36 100       62 if ( ref( $hash->{$k} ) ) {
381 12 50 66     33 if ( ref( $hash->{$k} ) eq "HASH" || ref( $hash->{$k} ) eq "ARRAY" ) {
382 12         17 delete_hash_deep( $hash->{$k}, $allowed_keys, $clean );
383 12 100 100     35 delete $hash->{$k} if ( $clean && ref( $hash->{$k} ) eq 'ARRAY' && scalar @{ $hash->{$k} } == 0 );
  3   100     9  
384             }
385             }
386             }
387             }
388             }
389             }
390             }
391              
392             =head1 AUTHOR
393              
394             DULAUNOY Fabrice, C<< >>
395              
396             =head1 BUGS
397              
398             Please report any bugs or feature requests to C, or through
399             the web interface at L. I will be notified, and then you'll
400             automatically be notified of progress on your bug as I make changes.
401              
402              
403              
404              
405             =head1 SUPPORT
406              
407             You can find documentation for this module with the perldoc command.
408              
409             perldoc BSON::Decode
410              
411              
412             You can also look for information at:
413              
414             =over 4
415              
416             =item * RT: CPAN's request tracker (report bugs here)
417              
418             L
419              
420             =item * AnnoCPAN: Annotated CPAN documentation
421              
422             L
423              
424             =item * CPAN Ratings
425              
426             L
427              
428             =item * Search CPAN
429              
430             L
431              
432             =back
433              
434              
435             =head1 ACKNOWLEDGEMENTS
436              
437              
438             =head1 LICENSE AND COPYRIGHT
439              
440             Copyright 2016 DULAUNOY Fabrice.
441              
442             This program is free software; you can redistribute it and/or modify it
443             under the terms of the the Artistic License (2.0). You may obtain a
444             copy of the full license at:
445              
446             L
447              
448             Any use, modification, and distribution of the Standard or Modified
449             Versions is governed by this Artistic License. By using, modifying or
450             distributing the Package, you accept this license. Do not use, modify,
451             or distribute the Package, if you do not accept this license.
452              
453             If your Modified Version has been derived from a Modified Version made
454             by someone other than you, you are nevertheless required to ensure that
455             your Modified Version complies with the requirements of this license.
456              
457             This license does not grant you the right to use any trademark, service
458             mark, tradename, or logo of the Copyright Holder.
459              
460             This license includes the non-exclusive, worldwide, free-of-charge
461             patent license to make, have made, use, offer to sell, sell, import and
462             otherwise transfer the Package with respect to any patent claims
463             licensable by the Copyright Holder that are necessarily infringed by the
464             Package. If you institute patent litigation (including a cross-claim or
465             counterclaim) against any party alleging that the Package constitutes
466             direct or contributory patent infringement, then this Artistic License
467             to you shall terminate on the date that such litigation is filed.
468              
469             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
470             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
471             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
472             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
473             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
474             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
475             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
476             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
477              
478              
479             =cut
480              
481             1; # End of BSON::Decode